1 /* $Id: wblast2.c,v 1.40 2007/03/30 14:53:07 madden Exp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: $RCSfile: wblast2.c,v $
27 *
28 * Initial Creation Date: 10/23/2000
29 *
30 * $Revision: 1.40 $
31 *
32 * File Description:
33 * BLAST 2 Sequences CGI program
34 *
35 * $Log: wblast2.c,v $
36 * Revision 1.40 2007/03/30 14:53:07 madden
37 * Select top strand by default
38 *
39 * Revision 1.39 2006/07/05 15:59:21 jianye
40 * limit the defline length to 512
41 *
42 * Revision 1.38 2006/06/07 23:44:48 jianye
43 * fix short identical user id collision
44 *
45 * Revision 1.37 2006/04/26 14:48:23 jianye
46 * force printing out accession in defline
47 *
48 * Revision 1.36 2006/04/26 12:49:08 madden
49 * fix typo
50 *
51 * Revision 1.35 2006/04/25 17:33:51 jianye
52 * fetch from nr first for protein sequence
53 *
54 * Revision 1.34 2006/03/20 17:04:51 jianye
55 * use gi for query if possible
56 *
57 * Revision 1.33 2006/03/07 17:06:30 jianye
58 * correct mask char
59 *
60 * Revision 1.32 2006/01/17 21:47:28 madden
61 * Adjust to change in BLAST_TwoSeqLocSets prototype
62 *
63 * Revision 1.31 2006/01/05 17:28:07 jianye
64 * use new formatter for internal use
65 *
66 * Revision 1.30 2005/12/05 16:14:48 coulouri
67 * c99 variable declaration breaks irix and solaris compilers
68 *
69 * Revision 1.29 2005/10/21 14:12:37 zaretska
70 * Added code to treat NCBI_SEQID seq type the same way as ACCESSION_GI to fix bug for Blast2Seq when called from 'Search trace archive...'
71 *
72 * Revision 1.28 2005/10/19 15:27:16 zaretska
73 * Fixed disappearing version number in accession. Strip carriage return from accession/gi enetred in GUI.
74 *
75 * Revision 1.27 2005/10/04 15:55:21 coulouri
76 * correct broken urls
77 *
78 * Revision 1.26 2005/09/07 14:21:41 coulouri
79 * correct broken link
80 *
81 * Revision 1.25 2005/08/19 15:53:50 coulouri
82 * correct grammar
83 *
84 * Revision 1.24 2005/07/29 22:19:30 dondosha
85 * If no Entrez client case, treat all sequences as FASTA
86 *
87 * Revision 1.23 2005/07/28 16:37:49 coulouri
88 * remove dead code
89 *
90 * Revision 1.22 2005/05/16 14:36:59 coulouri
91 * delayed declarations are not allowed in c
92 *
93 * Revision 1.21 2005/05/04 17:29:40 zaretska
94 * Combined accession/GI and sequence input in one entry box. Added checking for error after BLAST_TwoSeqLocSets call
95 *
96 * Revision 1.20 2005/04/11 19:14:37 dondosha
97 * Changed blast_form.map to IMG USEMAP for standalone version
98 *
99 * Revision 1.19 2005/04/04 15:16:01 dondosha
100 * If search returned error, display main page with an error and exit
101 *
102 * Revision 1.18 2005/03/15 21:36:42 dondosha
103 * Rolled back last change, because Bioseq retrieved from BLAST database does not have feature information
104 *
105 * Revision 1.16 2005/01/18 17:13:36 dondosha
106 * Set hint to eNone, so blastn is always used if megablast checkbox is not checked; fixed memory leaks; fixed uninitialized variable bug
107 *
108 * Revision 1.15 2004/10/26 15:51:35 dondosha
109 * Removed unsupported matrices
110 *
111 * Revision 1.14 2004/10/12 20:49:55 dondosha
112 * Correction due to change in SummaryReturn structure name and contents
113 *
114 * Revision 1.13 2004/08/16 19:39:35 dondosha
115 * Use CreateMaskByteStore function from txalign.h; fixed bug in masking filtered locations for translated queries
116 *
117 * Revision 1.12 2004/08/09 15:40:51 dondosha
118 * Propagated change in progotype for BLAST_TwoSeqLocSets
119 *
120 * Revision 1.11 2004/06/28 16:11:17 dondosha
121 * Fixed constant name
122 *
123 * Revision 1.10 2004/06/24 21:17:07 dondosha
124 * Boolean argument in ScoreAndEvalueToBuffers changed to Uint1
125 *
126 * Revision 1.9 2004/05/14 17:25:07 dondosha
127 * Allow use of new BLAST engine
128 *
129 * Revision 1.8 2003/12/19 18:12:37 coulouri
130 * fix name collision in aix
131 *
132 * Revision 1.7 2003/07/14 18:43:17 dondosha
133 * Changed Entrez references syntax
134 *
135 * Revision 1.6 2003/07/01 20:32:59 dondosha
136 * Fix in javascript for matrix and strand menus when program is changed
137 *
138 * Revision 1.5 2003/07/01 16:33:55 dondosha
139 * Changed logging; corrected CPU limit handling for Linux
140 *
141 * Revision 1.4 2003/05/09 22:12:25 dondosha
142 * Corrections for Darwin compilation
143 *
144 * Revision 1.3 2003/05/09 21:09:04 dondosha
145 * Removed unused variables
146 *
147 * Revision 1.2 2002/12/02 22:40:19 dondosha
148 * Changed uint64_t to a portable Uint8
149 *
150 * Revision 1.1 2002/12/02 18:05:53 dondosha
151 * Moved from different locations to a common one
152 *
153 * Revision 6.34 2002/12/02 17:58:30 dondosha
154 * No logging or connection thread for standalone WWW server version
155 *
156 * Revision 6.33 2002/10/04 21:24:55 dondosha
157 * 1. Added logging;
158 * 2. Added user disconnect handling: SIGPIPE, plus a poll thread to detect disconnection while nothing is yet being returned to the browser.
159 *
160 * Revision 6.32 2002/09/24 15:48:59 dondosha
161 * Changed path to standalone executables ftp site
162 *
163 * Revision 6.31 2002/09/04 20:13:37 dondosha
164 * Corrected the order of memory deallocation before exit
165 *
166 * Revision 6.30 2002/08/05 21:54:02 dondosha
167 * Added an #ifdef for QBLAST related stuff
168 *
169 * Revision 6.29 2002/06/18 22:18:31 dondosha
170 * Typo correction
171 *
172 * Revision 6.28 2002/05/09 15:38:53 dondosha
173 * Call BLASTOptionNewEx instead of BLASTOptionNew, so megablast defaults are set in a central place
174 *
175 * Revision 6.27 2002/05/07 14:53:56 dondosha
176 * Copying bioseq was wrong - loses protein link for CDS translation; need only to pack the sequence itself
177 *
178 * Revision 6.26 2002/05/02 22:40:18 dondosha
179 * Copy bioseqs if retrieved from ID1, removing non-residues from sequence
180 *
181 * Revision 6.25 2002/04/24 19:24:17 dondosha
182 * Use QBlast API to retrieve sequences when RID is provided in the URL
183 *
184 * Revision 6.24 2002/03/19 23:31:06 dondosha
185 * Do not increment options->wordsize by 4 for megablast any more
186 *
187 * Revision 6.23 2002/03/18 18:08:52 dondosha
188 * 1. Cleaned up code differentiating between NCBI and 'standalone' versions
189 * 2. Added DBLEN URL parameter to specify effective database length
190 * 3. Added RID URL parameter to allow fetching sequences from BLAST Queue
191 *
192 * Revision 6.22 2002/01/09 23:26:57 dondosha
193 * Change effective 12/27: get nr database size possibly using alias file
194 *
195 * Revision 6.21 2001/10/03 19:39:11 dondosha
196 * Tiny correction in formatting of minus strand alignments
197 *
198 * Revision 6.20 2001/07/31 16:47:39 dondosha
199 * Function FastaCheckDna made public for use in Web BLAST
200 *
201 * Revision 6.19 2001/07/27 16:06:42 dondosha
202 * Roll back use of fake subject id, instead use SeqIdFindBest function
203 *
204 * Revision 6.18 2001/07/25 21:33:29 dondosha
205 * Use fake subject Bioseq
206 *
207 * Revision 6.17 2001/07/23 20:22:02 dondosha
208 * Corrected score data computation when query is masked
209 *
210 * Revision 6.16 2001/03/14 16:43:11 dondosha
211 * Minor correction
212 *
213 * Revision 6.15 2001/03/02 20:48:55 dondosha
214 * Cosmetic changes suggested by Roma Tatusov
215 *
216 * Revision 6.14 2001/02/26 21:32:09 dondosha
217 * Do not remove non-alphanumeric characters from sequence in FastaCheckDna
218 *
219 * Revision 6.13 2001/02/16 20:21:01 dondosha
220 * 1. Added a strand option menu
221 * 2. Fixed bug in sequence id manipulation
222 *
223 * Revision 6.12 2001/01/09 21:01:20 dondosha
224 * For megablast, subtract 4 from wordsize when displaying next page
225 *
226 * Revision 6.11 2001/01/04 18:28:28 dondosha
227 * For tblastx, split seqalign into a linked list of seqaligns
228 *
229 * Revision 6.10 2000/12/21 15:57:32 dondosha
230 * Test chptr for being NULL
231 *
232 * Revision 6.9 2000/12/07 17:58:04 dondosha
233 * Enabled Mega BLAST extension choice for blastn program
234 *
235 * Revision 6.8 2000/11/22 19:35:11 dondosha
236 * Minor fixes of purify errors and memory leaks
237 *
238 * Revision 6.7 2000/11/16 23:27:50 dondosha
239 * Minor corrections from 11/07/2000
240 *
241 * Revision 6.6 2000/11/03 22:20:42 shavirin
242 * Added return value from Main() function.
243 *
244 * Revision 6.5 2000/11/03 20:46:41 dondosha
245 * A few bug fixes
246 *
247 * Revision 6.4 2000/11/03 16:39:17 shavirin
248 * Added alternative formating for Standalone WWW Blast server.
249 *
250 * Revision 6.3 2000/11/02 21:45:41 dondosha
251 * Removed printing content from code - should be done in the cgi script
252 *
253 * Revision 6.2 2000/11/02 20:51:00 dondosha
254 * Significant clean up of the code
255 *
256 * Revision 6.1 2000/10/31 19:37:31 dondosha
257 * Changed revision to 6.1
258 *
259 * Revision 1.4 2000/10/31 19:33:49 dondosha
260 * Changed gif images paths to current directory
261 *
262 * Revision 1.2 2000/10/23 21:22:31 dondosha
263 * Turned on CVS logging
264 *
265 * Revision 6.1
266 * Source code for the Blast 2 Sequences Web page
267 *
268 * ==========================================================================
269 */
270
271 #include <ncbi.h>
272 #include <tofasta.h>
273 #include <wwwblast.h>
274 #include <id1arch.h>
275 #include <txalign.h>
276
277 /* For rlimit stuff. */
278 #if defined(OS_UNIX)
279 #include <sys/resource.h>
280 #endif
281 #include <signal.h>
282 #include <stdarg.h>
283 #ifndef BL2SEQ_STANDALONE
284 #include <poll.h>
285 #endif
286 #include <time.h>
287 #ifdef NCBI_INTERNAL_NEW_FORMATTER
288 #include <showalignwrap.h>
289 #endif
290 #include <blastpat.h>
291 #ifndef BL2SEQ_STANDALONE
292 #include <Liburlapi/qblastnet.h>
293 #endif
294
295 #ifndef USE_OLD_BLAST
296 #include <algo/blast/api/twoseq_api.h>
297 #endif
298
299 #define MY_BLOSUM62 0
300 #define MY_PAM30 1
301 #define MY_PAM70 2
302 #define MY_BLOSUM45 3
303 #define MY_BLOSUM80 4
304
305 #define NR_SIZE_NA 2385885539
306 #define NR_SIZE_AA 181542687
307 #define DEFLINE_LENGTH 512
308 Char defline_buf[DEFLINE_LENGTH+4];
309
310 typedef struct prym{
311 Int2 len;
312 Int2 color;
313 CharPtr sym;
314 Boolean noleft, noright;
315 } Prym, PNTR PrymPtr;
316
317
JavaScriptFun()318 static void JavaScriptFun()
319 {
320 printf("<SCRIPT LANGUAGE=\"Javascript\">\n");
321
322 printf("<!-- HIDE\n");
323 printf("function chan(a)\n");
324 printf("{\n");
325 printf(" if (a.value == 'off') {\n");
326 printf(" document.bl2.gopen.value=11;\n");
327 printf(" document.bl2.gext.value=1;\n");
328 printf(" document.bl2.match.value=\"\";\n");
329 printf(" document.bl2.msmatch.value=\"\";\n");
330 printf(" a.value = 'on';\n");
331 printf(" } else {\n");
332 printf(" document.bl2.gopen.value=5;\n");
333 printf(" document.bl2.gext.value=2;\n");
334 printf(" document.bl2.match.value=\"1\";\n");
335 printf(" document.bl2.msmatch.value=\"-2\";\n");
336 printf(" a.value = 'off';\n");
337 printf(" }\n");
338 printf("}\n");
339
340 printf("function chan_prog(a)\n");
341 printf("{\n");
342 printf(" if (a.selectedIndex == 0) {\n");
343 printf(" document.bl2.gopen.value = 5;\n");
344 printf(" document.bl2.gext.value = 2;\n");
345 printf(" document.bl2.match.value=1;\n");
346 printf(" document.bl2.msmatch.value=\"-2\";\n");
347 printf(" document.bl2.word.value=\"11\";\n");
348 printf(" document.bl2.matrix.options.length = 1;\n");
349 printf(" document.bl2.matrix.options[0] = null;\n");
350 printf(" document.bl2.matrix.options[0] = new Option('Not Applicable');\n");
351 printf(" document.bl2.matrix.options[0].text='Not Applicable';\n");
352 printf(" document.bl2.matrix.options[0].value=-1;\n");
353 printf(" document.bl2.strand.options.length = 4;\n");
354 printf(" document.bl2.strand.options[0] = null;\n");
355 printf(" document.bl2.strand.options[0] = new Option('Both strands');\n");
356 printf(" document.bl2.strand.options[0].text='Both strands';\n");
357 printf(" document.bl2.strand.options[0].value=3;\n");
358 printf(" document.bl2.strand.options[0].selected=true;\n");
359 printf(" document.bl2.strand.options[1].text='Top strand';\n");
360 printf(" document.bl2.strand.options[1].value=1;\n");
361 printf(" document.bl2.strand.options[2].text='Reverse strand';\n");
362 printf(" document.bl2.strand.options[2].value=2;\n");
363 printf(" } else if (a.selectedIndex >= 1) {\n");
364 printf(" document.bl2.matrix.options.length = 6;\n");
365 printf(" document.bl2.matrix.options[0] = null;\n");
366 printf(" document.bl2.matrix.options[0] = new Option('BLOSUM62');\n");
367 printf(" document.bl2.matrix.options[0].selected = true;\n");
368 printf(" document.bl2.matrix.options[0].text='BLOSUM62';\n");
369 printf(" document.bl2.matrix.options[0].value=0;\n");
370 printf(" document.bl2.matrix.options[1].text='PAM30';\n");
371 printf(" document.bl2.matrix.options[1].value=1;\n");
372 printf(" document.bl2.matrix.options[2].text='PAM70';\n");
373 printf(" document.bl2.matrix.options[2].value=2;\n");
374 printf(" document.bl2.matrix.options[3].text='BLOSUM45';\n");
375 printf(" document.bl2.matrix.options[3].value=3;\n");
376 printf(" document.bl2.matrix.options[4].text='BLOSUM80';\n");
377 printf(" document.bl2.matrix.options[4].value=4;\n");
378 printf(" document.bl2.word.value=\"3\";\n");
379 printf(" if (document.bl2.matrix.selectedIndex == 1) {\n");
380 printf(" document.bl2.gopen.value = 9;\n");
381 printf(" document.bl2.gext.value = 1;\n");
382 printf(" } else if (document.bl2.matrix.selectedIndex == 2) {\n");
383 printf(" document.bl2.gopen.value = 10;\n");
384 printf(" document.bl2.gext.value = 1;\n");
385 printf(" } else if (document.bl2.matrix.selectedIndex == 3) {\n");
386 printf(" document.bl2.gopen.value = 15;\n");
387 printf(" document.bl2.gext.value = 2;\n");
388 printf(" } else if (document.bl2.matrix.selectedIndex == 4) {\n");
389 printf(" document.bl2.gopen.value =10;\n");
390 printf(" document.bl2.gext.value = 1;\n");
391 printf(" } else {\n");
392 printf(" document.bl2.gopen.value = 11;\n");
393 printf(" document.bl2.gext.value = 1;\n");
394 printf(" }\n");
395 printf(" document.bl2.match.value=\"\";\n");
396 printf(" document.bl2.msmatch.value=\"\";\n");
397 printf(" document.bl2.megablast.checked = 0;\n");
398 printf(" document.bl2.strand.options.length = 1;\n");
399 printf(" document.bl2.strand.options[0] = null;\n");
400 printf(" document.bl2.strand.options[0] = new Option('Not Applicable');\n");
401 printf(" document.bl2.strand.options[0].selected = true;\n");
402 printf(" document.bl2.strand.options[0].text='Not Applicable';\n");
403 printf(" document.bl2.strand.options[0].value=0;\n");
404 printf(" }\n");
405 printf("}\n");
406
407 printf("function update_mtrx(a)\n");
408 printf("{\n");
409 printf(" if (document.bl2.program.selectedIndex == 0) {\n");
410 printf(" return;\n");
411 printf(" }\n");
412 printf(" document.bl2.word.value = 3;\n");
413 printf(" if (a.selectedIndex == 0) {\n");
414 printf(" document.bl2.gopen.value = 11;\n");
415 printf(" document.bl2.gext.value = 1;\n");
416 printf(" } else if (a.selectedIndex == 1) {\n");
417 printf(" document.bl2.gopen.value = 9;\n");
418 printf(" document.bl2.gext.value = 1;\n");
419 printf(" } else if (a.selectedIndex == 2) {\n");
420 printf(" document.bl2.gopen.value = 10;\n");
421 printf(" document.bl2.gext.value = 1;\n");
422 printf(" } else if (a.selectedIndex == 3) {\n");
423 printf(" document.bl2.gopen.value = 14;\n");
424 printf(" document.bl2.gext.value = 2;\n");
425 printf(" } else if (a.selectedIndex == 4) {\n");
426 printf(" document.bl2.gopen.value = 10;\n");
427 printf(" document.bl2.gext.value = 1;\n");
428 printf(" } else if (a.selectedIndex == 5) {\n");
429 printf(" document.bl2.gopen.value = 13;\n");
430 printf(" document.bl2.gext.value = 2;\n");
431 printf(" }\n");
432 printf("}\n");
433
434 /* Function clear_sequence() */
435 printf("function clear_sequence() {\n");
436 printf(" document.bl2.seqfile1.value=''\n");
437 printf(" document.bl2.seqfile2.value=''\n");
438 printf(" document.bl2.sseq.value=''\n");
439 printf(" document.bl2.seq.value=''\n");
440 printf(" document.bl2.to.value=''\n");
441 printf(" document.bl2.tto.value=''\n");
442 printf(" document.bl2.from.value=''\n");
443 printf(" document.bl2.ffrom.value=''\n");
444 printf(" document.bl2.seq.focus()\n");
445 printf("}\n");
446
447 /* Function megablast_update(a) */
448 printf("function megablast_update(a)\n");
449 printf("{\n");
450 printf(" if (a.checked == 0) {\n");
451 printf(" document.bl2.word.value = 11;\n");
452 printf(" document.bl2.gopen.value = 5;\n");
453 printf(" document.bl2.gext.value = 2;\n");
454 printf(" } else {\n");
455 printf(" if (document.bl2.program.selectedIndex != 0) {\n");
456 printf(" document.bl2.program.selectedIndex = 0;\n");
457 printf(" chan_prog(document.bl2.program);\n");
458 printf(" }\n");
459 printf(" document.bl2.word.value = 28;\n");
460 printf(" document.bl2.gopen.value = \"\";\n");
461 printf(" document.bl2.gext.value = \"\";\n");
462 printf(" }\n");
463 printf("}\n");
464
465 printf("// -->\n");
466
467 printf("</SCRIPT>\n");
468 return;
469 }
470
tie_next_annot(SeqAnnotPtr head,SeqAnnotPtr next)471 static SeqAnnotPtr tie_next_annot(SeqAnnotPtr head, SeqAnnotPtr next)
472 {
473 SeqAnnotPtr v;
474
475 if (head == NULL) {
476 return next;
477 }
478 for (v = head; v->next != NULL; v = v->next) {
479 v = v;
480 }
481 v->next = next;
482 return head;
483 }
484
485 #ifndef BL2SEQ_STANDALONE/* No logging for the standalone WWW Server version */
486 typedef struct LogInfo {
487 CharPtr filename;
488 time_t time_start;
489 clock_t cpu_time;
490 Int4 pid;
491 CharPtr program;
492 Int4 q_length;
493 Int4 s_length;
494 CharPtr user_IP;
495 Int4 size;
496 } LogInfo;
497
498 static LogInfo loginfo;
499
LogInfoFree()500 static void LogInfoFree()
501 {
502 MemFree(loginfo.filename);
503 MemFree(loginfo.program);
504 MemFree(loginfo.user_IP);
505 }
506
507 #endif
508
509 #ifndef BL2SEQ_STANDALONE/* No logging for the standalone WWW Server version */
logmsg(int status)510 void logmsg(int status)
511 {
512 FILE *logfp;
513
514 if ((logfp = FileOpen(loginfo.filename, "a")) != NULL) {
515 if (loginfo.time_start == 0) {
516 loginfo.cpu_time = clock();
517 loginfo.time_start = time(NULL);
518 fprintf(logfp, "%ld|%d|%s|%ld|%ld|%s|start\n", loginfo.time_start,
519 loginfo.pid, loginfo.program, loginfo.q_length,
520 loginfo.s_length, loginfo.user_IP);
521 } else {
522 struct tm tt;
523 char tmstamp[128];
524 memcpy(&tt, localtime(&loginfo.time_start), sizeof(tt));
525 strftime(tmstamp, sizeof(tmstamp), "%Y-%m-%d %T", &tt);
526 loginfo.cpu_time = (clock() - loginfo.cpu_time) / CLOCKS_PER_SEC;
527
528 fprintf(logfp, "%ld|%s|%ld|%d|%s|%ld|%ld|%s|done|%d|%ld|%ld\n",
529 loginfo.time_start, tmstamp, loginfo.cpu_time, loginfo.pid,
530 loginfo.program, loginfo.q_length, loginfo.s_length,
531 loginfo.user_IP, status, time(NULL) - loginfo.time_start,
532 loginfo.size);
533 }
534 FileClose(logfp);
535 }
536 }
537 #endif
538
539 /* Controls the state of the signal handling thread */
540 static Boolean run_status;
541 /* Flag to indicate which signals were received */
542 static Uint8 sigflag = 0;
543 /* set of signal masks */
544 static Uint8 wb2_sigmask[64];
545
546 #define WBLAST2_SEARCH 0
547 #define WBLAST2_FORMAT 1
548 #define WBLAST2_DONE 2
549
550 /** precompute individual sigmasks */
PrepareSigmask(void)551 void PrepareSigmask(void)
552 {
553 int ii;
554 for( ii = 0; ii < 64; ii++ ) {
555 wb2_sigmask[ii] = 1 << ii;
556 }
557 }
558
559 /** */
GetSignal()560 static int GetSignal()
561 {
562 int ii;
563 for( ii = 0; ii < 64; ii++ ) {
564 if( sigflag & wb2_sigmask[ii] ) {
565 break;
566 }
567 }
568
569 return ii;
570 }
571
572 /** set signal flag */
sighandler(int sig)573 static void sighandler(int sig)
574 {
575 sigflag |= wb2_sigmask[sig];
576 }
577
578
Blast2SeqMainPage(CharPtr warning,CharPtr seq1,CharPtr seq2,CharPtr one,CharPtr two,ValNodePtr error,Boolean is_prot,BLAST_OptionsBlkPtr options,Int2 mtrx,Int4 from,Int4 to,Int4 ffrom,Int4 tto,Int2 filter,Int2 pagecount)579 static void Blast2SeqMainPage(CharPtr warning, CharPtr seq1, CharPtr seq2, CharPtr one, CharPtr two, ValNodePtr error, Boolean is_prot, BLAST_OptionsBlkPtr options, Int2 mtrx, Int4 from, Int4 to, Int4 ffrom, Int4 tto, Int2 filter, Int2 pagecount) {
580 /*****************************************************************
581 0 - no sequences
582 1 - nucleotide sequences in protein alignment
583 2 - non-nucleotide sequences in nucleotide alignment
584 4 - BLAST Options error mesg handling
585 5 - invalid location
586 6 - No alignment found
587 *****************************************************************/
588 ValNodePtr vnp;
589 BlastErrorMsgPtr error_msg;
590 Int4Ptr dgopen=NULL, dgext=NULL;
591 Uint1 prog_number;
592
593 if (options)
594 prog_number = BlastGetProgramNumber(options->program_name);
595 else
596 prog_number = 1;
597
598 switch (mtrx) {
599 case MY_PAM30:
600 BlastKarlinGetMatrixValues("PAM30",
601 &dgopen, &dgext, NULL, NULL, NULL, NULL);
602 break;
603 case MY_PAM70:
604 BlastKarlinGetMatrixValues("MY_PAM70",
605 &dgopen, &dgext, NULL, NULL, NULL, NULL);
606 break;
607 case MY_BLOSUM80:
608 BlastKarlinGetMatrixValues("MY_BLOSUM80",
609 &dgopen, &dgext, NULL, NULL, NULL, NULL);
610 break;
611 case MY_BLOSUM45:
612 BlastKarlinGetMatrixValues("MY_BLOSUM45",
613 &dgopen, &dgext, NULL, NULL, NULL, NULL);
614 break;
615 case MY_BLOSUM62:
616 default:
617 BlastKarlinGetMatrixValues("MY_BLOSUM62",
618 &dgopen, &dgext, NULL, NULL, NULL, NULL);
619 break;
620 }
621
622 printf("<html>\n");
623 printf("<head>\n");
624 printf("<title>Blast 2 Sequences</title>\n");
625 printf("<META NAME=\"keywords\" CONTENT=\"NCBI, BLAST, ORF, Bioinformatics\">\n");
626 #ifndef BL2SEQ_STANDALONE
627 printf("<link rel=\"stylesheet\" href=\"http://www.ncbi.nlm.nih.gov/ncbi.css\">\n");
628 #endif
629 JavaScriptFun();
630 printf("</HEAD>\n");
631
632 #ifndef BL2SEQ_STANDALONE
633 printf("<body bgcolor=\"#f0f0fe\">\n");
634 if (options) {
635 printf("<table border=0 width=600 cellspacing=0 cellpadding=0>\n");
636 printf("<tr valign=center> \n");
637 printf("<td width=140><A HREF=\"http://www.ncbi.nlm.nih.gov\"><img src=\"http://www.ncbi.nlm.nih.gov/corehtml/left.GIF\" width=130 height=45 border=0 ALT=\"NCBI logo\"></A></td>\n");
638 printf("<td width=460 ><h1>Blast 2 Sequences</h1></td>\n");
639 printf("</tr>\n");
640 printf("</table>\n");
641
642 printf("<!-- the quicklinks bar--> \n");
643 printf("<table border=0 width=600 cellspacing=0 cellpadding=1 bgcolor=#003366>\n");
644 printf("<tr align=\"center\">\n");
645 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed\" class=\"BAR\"><FONT COLOR=#FFFFFF>PubMed</FONT></a></td>\n");
646 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/Entrez/\" class=\"BAR\"><FONT COLOR=#FFFFFF>Entrez</FONT></a></td>\n");
647 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/blast/\" class=\"BAR\"><FONT COLOR=#FFFFFF>BLAST</FONT></a></td>\n");
648 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/omim/\" class=\"BAR\"><FONT COLOR=#FFFFFF>OMIM</FONT></a></td>\n");
649 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html\" class=\"BAR\"><FONT COLOR=#FFFFFF>Taxonomy</FONT></a></td>\n");
650 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/Structure/\" class=\"BAR\"><FONT COLOR=#FFFFFF>Structure</FONT></a></td>\n");
651 printf("</tr></table>\n");
652 } else {
653 printf("<CENTER><TABLE CELLSPACING=2 CELLPADDING=2 WIDTH=100%%>\n");
654 printf("<TR ALIGN=CENTER BGCOLOR=#8dc7cc BGCOLOR=#0000ff><TD><B><I>\n");
655 printf("<A HREF=http://www.ncbi.nlm.nih.gov/><font color=#ffffff>NCBI</font></A></I></B></TD>\n");
656 printf("<TD><B><I>\n");
657 printf("<A HREF=http://www.ncbi.nlm.nih.gov/Entrez><font color=#ffffff>Entrez</font></A>\n");
658 printf("</I></B></TD>\n");
659 printf("<TD><B><I><A HREF=wblast2.cgi><font color=#ffffff>BLAST 2 sequences</font></A></I></B></TD>\n");
660 printf("<TD><B><I><A HREF=http://www.ncbi.nlm.nih.gov/blast><font color=#ffffff>BLAST</font></A></I></B></TD>\n");
661 printf("<TD><B><I><A HREF=example.html><font color=#ffffff>Example</font></A></I></B></TD>\n");
662 printf("<TD><B><I>\n");
663 printf("<A HREF=http://www.ncbi.nlm.nih.gov/blast/blast_help.html><font color=#ffffff>Help</font></A>\n");
664 printf("</I></B></TD></TR></TABLE></CENTER>\n");
665 }
666 printf("<H2><CENTER><font color=#0000ff> BLAST 2 SEQUENCES</font></H2></CENTER>\n");
667 printf("<FORM NAME=\"bl2\" method=\"post\" action=\"wblast2.cgi?%d\" enctype=\"multipart/form-data\">\n", pagecount);
668 #else /* defined BL2SEQ_STANDALONE */
669 printf("<BODY BGCOLOR=\"#F0F0FE\" LINK=\"#0000FF\" "
670 "VLINK=\"#660099\" ALINK=\"#660099\">\n");
671 printf("<map name=img_map>\n");
672 printf("<area shape=rect coords=2,1,48,21 "
673 "href=\"http://www.ncbi.nlm.nih.gov\">\n");
674 printf("<area shape=rect coords=385,1,435,21 "
675 "href=\"index.html\">\n");
676 printf("<area shape=rect coords=436,1,486,21 "
677 "href=\"http://www.ncbi.nlm.nih.gov/Entrez/\">\n");
678 printf("<area shape=rect coords=487,1,508,21 "
679 "href=\"docs/blast_help.html\">\n");
680 printf("</map>\n");
681 printf("<IMG USEMAP=#img_map WIDTH=509 HEIGHT=22 "
682 "SRC=\"images/bl2seq.gif\" BORDER=0 ISMAP></A>\n");
683 printf("<FORM NAME=\"bl2\" method=\"post\" "
684 #ifdef NCBI_ENTREZ_CLIENT
685 "action=\"wblast2_cs.cgi?%d\" "
686 #else
687 "action=\"wblast2.cgi?%d\" "
688 #endif
689 "enctype=\"multipart/form-data\">\n", pagecount);
690 #endif
691 if (warning)
692 printf("<h3><font color=#EE0000>WARNING:</font><font color=#0000EE> %s</font></h3>", warning);
693 else {
694 for (vnp=error; vnp; vnp=vnp->next) {
695 error_msg = vnp->data.ptrvalue;
696 printf("<h3><font color=#EE0000>WARNING:</font><font color=#0000EE> %s</font></h3>", error_msg->msg);
697 }
698 }
699 if (!options) {
700 printf("This tool produces the alignment of two given sequences using "
701 #ifndef BL2SEQ_STANDALONE
702 "<A HREF=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=9254694&dopt=Citation\" TARGET=one>BLAST</A> engine for local alignment. <BR>The stand-alone executable for blasting two sequences (bl2seq) can be retrieved from <A HREF=ftp://ftp.ncbi.nlm.nih.gov/blast/executables> NCBI ftp site</A><br><b><A HREF=http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10339815&dopt=Abstract>Reference:</A></b> Tatiana A. Tatusova, Thomas L. Madden (1999), \"Blast 2 sequences - a new tool for comparing protein and nucleotide sequences\", FEMS Microbiol Lett. 174:247-250 \
703 <br><BR>"
704 #else
705 "BLAST<BR>engine for local alignment.<P>\n"
706 #endif
707 );
708 }
709 #ifndef BL2SEQ_STANDALONE
710 printf("<A HREF=http://www.ncbi.nlm.nih.gov/blast/blast_program.html TARGET=one>Program</A>\n");
711 #else
712 printf("<A HREF=\"docs/blast_program.html\">Program</A>\n");
713 #endif
714 printf("<select name=\"program\" onChange=\"chan_prog(this)\">\n");
715 printf("<option%s> blastn\n", (prog_number == blast_type_blastn) ? " SELECTED" : "");
716 printf("<option%s> blastp\n", (prog_number == blast_type_blastp) ? " SELECTED" : "");
717 printf("<option%s> tblastn\n", (prog_number == blast_type_tblastn) ? " SELECTED" : "");
718 printf("<option%s> blastx\n", (prog_number == blast_type_blastx) ? " SELECTED" : "");
719 printf("<option%s> tblastx\n", (prog_number == blast_type_tblastx) ? " SELECTED" : "");
720 printf("</select>\n");
721 #ifndef BL2SEQ_STANDALONE
722 printf("<A HREF=http://www.ncbi.nlm.nih.gov/blast/options.html#matrix TARGET=one>Matrix</A>\n");
723 #else
724 printf("<A HREF=docs/options.html#matrix>Matrix</A>\n");
725 #endif
726 printf("<select name=\"matrix\" onChange=\"update_mtrx(this)\">\n");
727 if (prog_number != blast_type_blastn) {
728 printf("<option value=0>BLOSUM62\n");
729 printf("<option value=1%s>PAM30\n", (mtrx == 1) ? " SELECTED" : "");
730 printf("<option value=2%s>PAM70\n", (mtrx == 2) ? " SELECTED" : "");
731 printf("<option value=3%s>BLOSUM45\n", (mtrx == 3) ? " SELECTED" : "");
732 printf("<option value=4%s>BLOSUM80\n", (mtrx == 4) ? " SELECTED" : "");
733 } else
734 printf("<option> Not Applicable\n");
735 printf("</select>\n");
736
737 printf("<HR>\n");
738
739 /* -------------------------------------------------- */
740 printf("Parameters used in <A HREF="
741 #ifndef BL2SEQ_STANDALONE
742 "http://www.ncbi.nlm.nih.gov/blast/full_options.html#blastn TARGET=one>"
743 #else
744 "docs/full_options.html#blastn> "
745 #endif
746 "BLASTN </A>program only:<BR>\n");
747 if (options) {
748 printf("<strong>Match:</strong><INPUT type=text size=8 name=\"match\"%s>",
749 (is_prot == FALSE) ? " value=1" : "");
750 printf("<strong>Mismatch:</strong><INPUT type=text size=8 name=\"msmatch\"%s>",
751 (is_prot == FALSE) ? " value=-2" : "");
752 printf("<BR>\n");
753 printf("Open gap\n");
754 printf("<INPUT type=text size=4 name=\"gopen\" value=%d>\n",
755 options->gap_open);
756 printf("and extension gap\n");
757 printf("<INPUT type=text size=4 name=\"gext\" value=%d> penalties<BR>\n", options->gap_extend);
758 printf("gap x_dropoff\n");
759 printf("<INPUT type=text size=4 name=\"dropoff\" value=%d>\n", options->gap_x_dropoff);
760 printf("<A HREF=http://www.ncbi.nlm.nih.gov/blast/newoptions.html#expect>expect</A>\n");
761 printf("<INPUT type=text size=4 name=\"expect\" value=%f>\n", options->expect_value);
762 printf(" word size\n");
763 printf("<INPUT type=text size=4 name=\"word\" value=%d>\n",
764 options->wordsize);
765 } else {
766 printf("<strong>Reward for a match:</strong><INPUT type=text size=8 name=\"match\"%s>",
767 (is_prot == FALSE) ? " value=1" : "");
768 printf("<strong>Penalty for a mismatch:</strong><INPUT type=text size=8 name=\"msmatch\"%s>",
769 (is_prot == FALSE) ? " value=-2" : "");
770 printf("<BR><BR>\n");
771 printf("<INPUT type=checkbox NAME=megablast onClick=\"megablast_update(this)\"> Use <a href="
772 #ifndef BL2SEQ_STANDALONE
773 "http://www.ncbi.nlm.nih.gov/blast/megablast.html"
774 #else
775 "docs/megablast_readme.html"
776 #endif
777 ">Mega BLAST</a> ");
778 printf("Strand option <select name=\"strand\">\n");
779 printf("<option value=3 SELECTED> Both strands\n");
780 printf("<option value=1> Top strand\n");
781 printf("<option value=2> Reverse strand\n");
782 printf("</select> ");
783 #ifdef NCBI_INTERNAL_NEW_FORMATTER
784 printf("View option <select name=\"view\">\n");
785 printf("<option value=1 SELECTED> Standard\n");
786 printf("<option value=2> Mismatch-highlighting\n");
787 printf("</select> \n");
788 printf("<BR>Masking character option <select name=\"mask_char\">\n");
789 printf("<option value=0 SELECTED> X for protein, n for nucleotide\n");
790 printf("<option value=2> Lower case\n");
791 printf("</select> \n");
792
793 printf("Masking color option <select name=\"mask_color\">\n");
794 printf("<option value=1 SELECTED> Black\n");
795 printf("<option value=2> Grey\n");
796 printf("<option value=3> Red\n");
797 printf("</select> \n");
798 printf("<BR><INPUT type=checkbox NAME=cds_translation>Show CDS translation\n");
799 #endif
800 printf("<HR>\n");
801
802 printf("Open gap\n");
803 printf("<INPUT type=text size=4 name=\"gopen\" value=5>\n");
804 printf("and extension gap\n");
805 printf("<INPUT type=text size=4 name=\"gext\" value=2> penalties<BR>\n");
806 printf("gap x_dropoff\n");
807 printf("<INPUT type=text size=4 name=\"dropoff\" value=50>\n");
808 printf("<A HREF=http://www.ncbi.nlm.nih.gov/blast/newoptions.html#expect>expect</A>\n");
809 printf("<INPUT type=text size=4 name=\"expect\" value=10.0>\n");
810 printf(" word size\n");
811 printf("<INPUT type=text size=4 name=\"word\" value=11>\n");
812 }
813 printf("<a href="
814 #ifndef BL2SEQ_STANDALONE
815 "http://www.ncbi.nlm.nih.gov/blast"
816 #else
817 "docs"
818 #endif
819 "/newoptions.html#filter>Filter</a>\n");
820 printf("<INPUT TYPE=checkbox NAME=Filter VALUE=%d", filter);
821 if (filter == 1) {
822 printf(" CHECKED>\n");
823 } else {
824 printf(">\n");
825 }
826 printf("<INPUT TYPE=\"submit\" VALUE=\"Align\">\n");
827 if (options)
828 printf("<INPUT TYPE=\"reset\" VALUE=\"Clear Input\">\n");
829
830 printf("<HR>\n");
831 #ifdef NCBI_ENTREZ_CLIENT
832 printf("<font color=ff0000>Sequence 1</font><BR>\n Enter accession, GI or sequence in FASTA format \n");
833 #else
834 printf("<font color=ff0000>Sequence 1</font> Enter sequence in FASTA format \n");
835 #endif
836 printf("<font color=ff0000>from:<INPUT type=text size=8 name=\"from\" value=%d>to:<INPUT type=text size=8 name=\"to\" value=%d></font><BR>\n", from, to);
837 printf("<textarea name=\"seq\" rows=6 cols=60>");
838 if (seq1) {
839 if (*seq1 == '>') {
840 printf(">");
841 seq1++;
842 }
843 printf("%s</textarea>\n", seq1);
844 } else {
845 printf("</textarea>\n");
846 }
847 printf("<BR>\n");
848 printf("or upload FASTA file <INPUT type=file name=\"seqfile1\">");
849 printf("<HR>\n");
850 #ifdef NCBI_ENTREZ_CLIENT
851 printf("<font color=ff0000>Sequence 2</font><BR>\n Enter accession, GI or sequence in FASTA format \n");
852 #else
853 printf("<font color=ff0000>Sequence 2</font> Enter sequence in FASTA format \n");
854 #endif
855 printf("<font color=ff0000>from:<INPUT type=text size=8 name=\"ffrom\" value=%d>to:<INPUT type=text size=8 name=\"tto\" value=%d></font><BR>\n", ffrom, tto);
856 printf("<textarea name=\"sseq\" rows=6 cols=60>");
857 if (seq2) {
858 if (*seq2 == '>') {
859 printf(">");
860 seq2++;
861 }
862 printf("%s</textarea>\n", seq2);
863 } else {
864 printf("</textarea>\n");
865 }
866 printf("<BR>\n");
867 printf("or upload FASTA file <INPUT type=file name=\"seqfile2\">");
868 printf("<BR>\n");
869
870 printf("<INPUT TYPE=\"submit\" VALUE=\"Align\">\n");
871 printf("<INPUT TYPE=\"reset\" VALUE=\"Clear Input\">\n");
872 printf("<INPUT TYPE=hidden name=\"page\" value=\"%d\">\n", pagecount+1);
873 if (options)
874 printf("<INPUT TYPE=hidden name=\"program\" value=\"%s\">\n",
875 options->program_name);
876 else
877 printf("<INPUT TYPE=hidden name=\"program\" value=\"blastn\">\n");
878
879 printf("</form>\n");
880 printf("<HR>\n");
881 printf("<ADDRESS> Comments and suggestions to");
882 printf(" <A HREF=\"mailto:blast-help@ncbi.nlm.nih.gov\">blast-help@ncbi.nlm.nih.gov</A>\n");
883 printf("<BR>\n");
884 printf("</body>\n");
885 printf("</html>\n");
886 fflush(stdout);
887
888 options = BLASTOptionDelete(options);
889
890 exit (0);
891 }
892
CreateJavaHeadHTML(BioseqPtr query_bsp,BioseqPtr subject_bsp,Int4 from,Int4 to,Int4 ffrom,Int4 tto,Int4 len1,Int4 len2,CharPtr progname)893 static void CreateJavaHeadHTML(BioseqPtr query_bsp, BioseqPtr subject_bsp, Int4 from, Int4 to, Int4 ffrom, Int4 tto, Int4 len1, Int4 len2, CharPtr progname)
894 {
895 static Char tmp[128];
896
897 printf("<HTML>\n");
898 printf("<head>\n");
899 printf("<title>Blast Result</title>\n\n");
900 printf("<META NAME=\"keywords\" CONTENT=\"NCBI, BLAST, ORF, Bioinformatics\">\n");
901 #ifndef BL2SEQ_STANDALONE
902 printf("<link rel=\"stylesheet\" href=\"http://www.ncbi.nlm.nih.gov/ncbi.css\">\n");
903 #endif
904 JavaScriptFun();
905 printf("</head>\n");
906 #ifndef BL2SEQ_STANDALONE
907 printf("<BODY bgcolor=\"#f0f0fe\">\n");
908 printf("<table border=0 width=600 cellspacing=0 cellpadding=0>\n");
909 printf("<tr valign=center> \n");
910 printf("<td width=140><A HREF=\"http://www.ncbi.nlm.nih.gov\"><img src=\"http://www.ncbi.nlm.nih.gov/corehtml/left.GIF\" width=130 height=45 border=0 ALT=\"NCBI logo\"></A></td>\n");
911 printf("<td width=460 ><h1>Blast 2 Sequences results</h1></td>\n");
912 printf("</tr>\n");
913 printf("</table>\n");
914 printf("<!-- the quicklinks bar--> \n");
915 printf("<table border=0 width=600 cellspacing=0 cellpadding=1 bgcolor=#003366>\n");
916 printf("<tr align=\"center\">\n");
917 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed\" class=\"BAR\"><FONT COLOR=#FFFFFF>PubMed</FONT></a></td>\n");
918 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/Entrez/\" class=\"BAR\"><FONT COLOR=#FFFFFF>Entrez</FONT></a></td>\n");
919 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/blast/\" class=\"BAR\"><FONT COLOR=#FFFFFF>BLAST</FONT></a></td>\n");
920 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/omim/\" class=\"BAR\"><FONT COLOR=#FFFFFF>OMIM</FONT></a></td>\n");
921 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html\" class=\"BAR\"><FONT COLOR=#FFFFFF>Taxonomy</FONT></a></td>\n");
922 printf("<td width=100><a href=\"http://www.ncbi.nlm.nih.gov/Structure/\" class=\"BAR\"><FONT COLOR=#FFFFFF>Structure</FONT></a></td>\n");
923 printf("</tr></TABLE><p>\n");
924 #else
925 printf("<BODY BGCOLOR=\"#F0F0FE\" LINK=\"#0000FF\" "
926 "VLINK=\"#660099\" ALINK=\"#660099\">\n");
927
928 printf("<A HREF=\"blast_form.map\">"
929 "<IMG SRC=\"images/bl2seq.gif\" BORDER=0 ISMAP></A>\n<P>");
930 #endif
931 init_buff();
932 sprintf(tmp, "BLAST 2 sequences results version %s", progname);
933 BlastPrintVersionInfo(tmp, TRUE, stdout);
934 free_buff();
935
936 return;
937 }
938
939
CreateTailHTML()940 static void CreateTailHTML()
941 {
942 printf("<br>\n");
943 printf("</BODY>\n");
944 printf("</HTML>\n");
945 }
946
NumToGun1(int n)947 static int NumToGun1(int n)
948 {
949 return (n < 10) ? n+'0' : (n < 36) ? n-10+'A' : n-36+'a';
950 }
951
NumToGun(int n)952 static CharPtr NumToGun(int n)
953 {
954 static Char str[4];
955
956 str[0] = NumToGun1(n/62);
957 str[1] = NumToGun1(n%62);
958 return str;
959 }
960
961 /** Frees the part of the byte store list that has not been used for replacement
962 * of Bioseq data.
963 */
964 static void
ByteStoreListFreeUnused(ValNodePtr bs_list,Uint1 frame_to_skip)965 ByteStoreListFreeUnused (ValNodePtr bs_list, Uint1 frame_to_skip)
966 {
967 ByteStorePtr bsp;
968 ValNodePtr bs_next = NULL;
969 Boolean skip_done = FALSE;
970
971 for( ; bs_list; bs_list = bs_next) {
972 bs_next = bs_list->next;
973 if (skip_done || bs_list->choice != frame_to_skip) {
974 bsp = (ByteStorePtr) bs_list->data.ptrvalue;
975 if(bsp != NULL)
976 BSFree(bsp);
977 } else {
978 skip_done = TRUE;
979 }
980 MemFree(bs_list);
981 }
982 }
983
984 /*******************************************************/
985
PrintOutScore(SeqAlignPtr sap,Boolean is_aa,Int4Ptr PNTR matrix,ValNodePtr mask_loc)986 static void PrintOutScore(SeqAlignPtr sap, Boolean is_aa, Int4Ptr PNTR matrix, ValNodePtr mask_loc)
987 {
988 Int4 number, score;
989 Nlm_FloatHi bit_score, evalue;
990 CharPtr eval_buff_ptr;
991 Char eval_buff[10], bit_score_buff[10];
992 AlignSumPtr asp;
993 Int2 percent_identical, percent_positive;
994 ValNodePtr bs_list;
995 BioseqPtr q_bsp;
996 Int4 frame;
997 Uint1 code=0;
998 Uint1 repr=0;
999 ByteStorePtr seq_data=NULL;
1000 Boolean local_matrix = FALSE;
1001 Boolean seq_data_replaced = FALSE;
1002
1003 printf("<pre>\n");
1004 GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
1005
1006 /* Evalue buffer for printing may be shifted if a digit is knocked off
1007 before e. */
1008 eval_buff_ptr = eval_buff;
1009 ScoreAndEvalueToBuffers(bit_score, evalue, bit_score_buff,
1010 &eval_buff_ptr, TX_KNOCK_OFF_ALLOWED);
1011
1012 if (number == 1) {
1013 printf("Score = %s bits (%ld), Expect = %s<BR>",
1014 bit_score_buff, (long) score, eval_buff_ptr);
1015 } else {
1016 printf("Score = %s bits (%ld), Expect(%ld) = %s<BR>",
1017 bit_score_buff, (long) score, (long) number, eval_buff_ptr);
1018 }
1019 asp = MemNew(sizeof(AlignSum));
1020 asp->matrix = NULL;
1021 if (is_aa) {
1022 if (matrix != NULL)
1023 asp->matrix = matrix;
1024 else {
1025 asp->matrix = load_default_matrix();
1026 local_matrix = TRUE;
1027 }
1028 }
1029 asp->is_aa = is_aa;
1030 asp->totlen = 0;
1031 asp->positive = 0;
1032 asp->identical = 0;
1033 asp->gaps = 0;
1034 asp->master_sip = SeqIdDup(TxGetQueryIdFromSeqAlign(sap));
1035 asp->target_sip = TxGetSubjectIdFromSeqAlign(sap);
1036
1037 if (mask_loc) { /* Mask the query sequence */
1038 bs_list = CreateMaskByteStore (mask_loc);
1039 q_bsp = BioseqLockById(asp->master_sip);
1040 if (ISA_na(q_bsp->mol) && sap->segtype == SAS_STD) {
1041 StdSegPtr ssp = (StdSegPtr) sap->segs;
1042 frame = SeqLocStart(ssp->loc);
1043 if (SeqLocStrand(ssp->loc) == Seq_strand_minus) {
1044 frame += SeqLocLen(ssp->loc);
1045 frame = -(1+(q_bsp->length - frame)%3);
1046 } else {
1047 frame = (1+frame%3);
1048 }
1049 } else
1050 frame = 0;
1051 repr = q_bsp->repr;
1052 seq_data = q_bsp->seq_data;
1053 code = q_bsp->seq_data_type;
1054 seq_data_replaced = replace_bytestore_data(q_bsp, bs_list, (Uint1)frame);
1055 if (!seq_data_replaced) {
1056 q_bsp->repr = repr;
1057 q_bsp->seq_data = seq_data;
1058 q_bsp->seq_data_type = code;
1059 }
1060 BioseqUnlock(q_bsp);
1061 ByteStoreListFreeUnused(bs_list, (Uint1)frame);
1062 }
1063
1064 find_score_in_align(sap, 1, asp);
1065
1066 /* Restore the unmasked sequence data. */
1067 if (seq_data_replaced) {
1068 q_bsp = BioseqLockById(asp->master_sip);
1069 q_bsp->repr = repr;
1070 q_bsp->seq_data = seq_data;
1071 q_bsp->seq_data_type = code;
1072 }
1073
1074 if (asp && asp->totlen > 0) {
1075 Char sign1, sign2;
1076
1077 percent_identical = (100*asp->identical)/ (asp->totlen);
1078 percent_positive = (100*asp->positive)/ (asp->totlen);
1079 if (asp->is_aa)
1080 printf("Identities = %ld/%ld (%ld%%), Positives = %ld/%ld (%ld%%)", (long) asp->identical, (long) asp->totlen, percent_identical,
1081 (long) (asp->positive+asp->identical), (long)
1082 asp->totlen, (percent_identical+percent_positive));
1083 else
1084 printf("Identities = %ld/%ld (%ld%%)", (long) asp->identical, (long) asp->totlen, percent_identical);
1085 if (asp->gaps > 0) {
1086 printf(", Gaps = %ld/%ld (%ld%%)<BR>", (long) asp->gaps,
1087 (long) asp->totlen,
1088 (long) (100*asp->gaps)/(asp->totlen));
1089 } else {
1090 printf("<BR>");
1091 }
1092
1093 sign1 = sign2 = '0';
1094 if (asp->m_frame > 0) sign1 = '+';
1095 else if (asp->m_frame < 0) sign1 = '-';
1096 if (asp->t_frame > 0) sign2 = '+';
1097 else if (asp->t_frame < 0) sign2 = '-';
1098 if (sign1 != '0' && sign2 != '0')
1099 printf(" Frame = %c%d / %c%d", sign1, ABS(asp->m_frame),
1100 sign2, ABS(asp->t_frame));
1101 else if (sign1 != '0')
1102 printf(" Frame = %c%d", sign1, ABS(asp->m_frame));
1103 else if (sign2 != '0')
1104 printf(" Frame = %c%d", sign2, ABS(asp->t_frame));
1105 else if (asp->m_strand != Seq_strand_unknown && asp->t_strand != Seq_strand_unknown) {
1106 if (asp->m_strand != asp->t_strand)
1107 printf(" Strand = Plus / Minus");
1108 else
1109 printf(" Strand = Plus / Plus");
1110 }
1111 printf("<BR><BR>");
1112 }
1113 printf("</pre>\n");
1114
1115 SeqIdFree(asp->master_sip);
1116 if (local_matrix)
1117 free_default_matrix(asp->matrix);
1118 MemFree(asp);
1119 }
1120
CreateRectAlign(SeqAlignPtr sap,PrymPtr PNTR rect,PrymPtr PNTR rectY,FloatHi scalex,FloatHi scaley,Int4 l1,Int4 l2,Int2 color,Int4 from,Int4 ffrom,Int4 to,Int4 tto)1121 static Int2 CreateRectAlign(SeqAlignPtr sap, PrymPtr PNTR rect, PrymPtr PNTR rectY, FloatHi scalex, FloatHi scaley, Int4 l1, Int4 l2, Int2 color, Int4 from, Int4 ffrom, Int4 to, Int4 tto)
1122 {
1123 DenseDiagPtr ddp;
1124 DenseSegPtr dsp;
1125 Int4 index1;
1126 Int4 x1, x2, y1, y2;
1127 Int2 k, sx, sy;
1128 Boolean first = TRUE;
1129
1130 if (sap == NULL) {
1131 return 0;
1132 }
1133 k = 0;
1134
1135 if (sap->segtype == SAS_DENDIAG) {
1136 ddp = sap->segs;
1137 x2 = y2 = 0;
1138 first = TRUE;
1139 while (ddp != NULL) {
1140 x1 = ddp->starts [0];
1141 y1 = ddp->starts [1];
1142 sx = ddp->strands[0];
1143 sy = ddp->strands[1];
1144 if (first) {
1145 if (sx == Seq_strand_plus) {
1146 x1 = ddp->starts[0] - from + 1;
1147 } else {
1148 x1 = to - (ddp->starts[0] + ddp->len) + 1;
1149 }
1150 if (sy == Seq_strand_plus) {
1151 y1 = ddp->starts[1] - from + 1;
1152 } else {
1153 y1 = tto - (ddp->starts[1] + ddp->len) + 1;
1154 }
1155 rect[k] = MemNew(sizeof(Prym));
1156 rect[k]->color = -1;
1157 rect[k]->len = 0;
1158 rectY[k] = MemNew(sizeof(Prym));
1159 rectY[k]->color = -1;
1160 rectY[k]->len = 0;
1161 if (x1 < y1) {
1162 rect[k]->len = (y1-x1)*scalex + 0.5;
1163 } else {
1164 rectY[k]->len = (x1-y1)*scalex + 0.5;
1165 }
1166 first = FALSE;
1167 k++;
1168 }
1169 rect[k] = MemNew(sizeof(Prym));
1170 rect[k]->color = 0;
1171 rect[k]->len = (x1 - x2)*scalex + 0.5;
1172 rectY[k] = MemNew(sizeof(Prym));
1173 rectY[k]->color = 0;
1174 rectY[k]->len = (y1 - y2)*scaley + 0.5;
1175 k++;
1176 x2 = ddp->starts [0] + ddp->len;
1177 y2 = ddp->starts [1] + ddp->len;
1178 rect[k] = MemNew(sizeof(Prym));
1179 rect[k]->color = color;
1180 rect[k]->len = (x2 - x1)*scalex + 0.5;
1181 rectY[k] = MemNew(sizeof(Prym));
1182 rectY[k]->color = color;
1183 rectY[k]->len = (y2 - y1)*scaley + 0.5;
1184 k++;
1185
1186 ddp = ddp->next;
1187 }
1188 rect[k] = MemNew(sizeof(Prym));
1189 rect[k]->color = 0;
1190 if (sx != Seq_strand_minus) {
1191 rect[k]->len = (to - x2)*scalex + 0.5;
1192 } else {
1193 rect[k]->len = (x1 - from +1)*scalex + 0.5;
1194 }
1195 rectY[k] = MemNew(sizeof(Prym));
1196 rectY[k]->color = 0;
1197 if (sy != Seq_strand_minus) {
1198 rectY[k]->len = (tto - y2)*scaley + 0.5;
1199 } else {
1200 rectY[k]->len = (y1 - ffrom +1)*scaley + 0.5;
1201 }
1202 k++;
1203 } else if (sap->segtype == SAS_DENSEG) {
1204 dsp = sap->segs;
1205 k = 0;
1206 for (index1=0; index1 < dsp->numseg; index1++) {
1207 x1 = dsp->starts[2*index1];
1208 y1 = dsp->starts[2*index1+1];
1209 sx = dsp->strands[2*index1];
1210 sy = dsp->strands[2*index1+1];
1211 x2 = x1 + dsp->lens[index1];
1212 y2 = y1 + dsp->lens[index1];
1213 if (index1 == 0) {
1214 if (sx == Seq_strand_minus) {
1215 x1 = to - (dsp->starts[0] + dsp->lens[0]) + 1;
1216 } else {
1217 x1 = dsp->starts[0] - from + 1;
1218 }
1219 if (sy == Seq_strand_minus) {
1220 y1 = tto - (dsp->starts[1] + dsp->lens[0]) + 1;
1221 } else {
1222 y1 = dsp->starts[1] - ffrom + 1;
1223 }
1224 rect[k] = MemNew(sizeof(Prym));
1225 rect[k]->color = -1;
1226 rect[k]->len = 0;
1227 rectY[k] = MemNew(sizeof(Prym));
1228 rectY[k]->color = -1;
1229 rectY[k]->len = 0;
1230 if (x1 < y1) {
1231 rect[k]->len = (y1-x1)*scalex + 0.5;
1232 } else {
1233 rectY[k]->len = (x1-y1)*scalex + 0.5;
1234 }
1235 k++;
1236 rect[k] = MemNew(sizeof(Prym));
1237 rect[k]->color = 0;
1238 rect[k]->len = x1*scalex + 0.5;
1239 rectY[k] = MemNew(sizeof(Prym));
1240 rectY[k]->color = 0;
1241 rectY[k]->len = y1*scalex + 0.5;
1242 k++;
1243 }
1244 rect[k] = MemNew(sizeof(Prym));
1245 if (x1 == -1) {
1246 rect[k]->color = 3;
1247 } else {
1248 rect[k]->color = color;
1249 }
1250 rect[k]->len = dsp->lens[index1]*scalex + 0.5;
1251 rectY[k] = MemNew(sizeof(Prym));
1252 if (y1 == -1) {
1253 rectY[k]->color = 3;
1254 } else {
1255 rectY[k]->color = color;
1256 }
1257 rectY[k]->len = dsp->lens[index1]*scalex + 0.5;
1258 k++;
1259 }
1260 rect[k] = MemNew(sizeof(Prym));
1261 rect[k]->color = 0;
1262 if (sx == Seq_strand_minus) {
1263 rect[k]->len = (x1 - from +1)*scalex + 0.5;
1264 } else {
1265 rect[k]->len = (to - x2)*scalex + 0.5;
1266 }
1267 rectY[k] = MemNew(sizeof(Prym));
1268 rectY[k]->color = 0;
1269 if (sy == Seq_strand_minus) {
1270 rectY[k]->len = (y1 - ffrom +1)*scaley + 0.5;
1271 } else {
1272 rectY[k]->len = (tto - y2)*scaley + 0.5;
1273 }
1274 k++;
1275 } else if (sap->segtype == SAS_STD) {
1276 StdSegPtr ssp = sap->segs;
1277 SeqLocPtr slp;
1278
1279 while (ssp) {
1280 slp = ssp->loc;
1281 if (!slp || !slp->next) {
1282 ssp = ssp->next;
1283 continue;
1284 }
1285 if (slp->choice == SEQLOC_EMPTY) {
1286 x1 = x2 = -1;
1287 sx = Seq_strand_unknown;
1288 } else {
1289 x1 = SeqLocStart(slp);
1290 sx = SeqLocStrand(slp);
1291 x2 = SeqLocStop(slp) + 1;
1292 }
1293 if (slp->next->choice == SEQLOC_EMPTY) {
1294 y1 = y2 = -1;
1295 sy = Seq_strand_unknown;
1296 } else {
1297 y1 = SeqLocStart(slp->next);
1298 sy = SeqLocStrand(slp->next);
1299 y2 = SeqLocStop(slp->next) + 1;
1300 }
1301
1302 if (first) {
1303 if (sx == Seq_strand_plus) {
1304 x1 -= from - 1;
1305 } else if (sx == Seq_strand_minus) {
1306 x1 = to - x2 + 1;
1307 }
1308 if (sy == Seq_strand_plus) {
1309 y1 -= ffrom - 1;
1310 } else if (sy == Seq_strand_minus){
1311 y1 = tto - y2 + 1;
1312 }
1313
1314 rect[k] = MemNew(sizeof(Prym));
1315 rect[k]->color = -1;
1316 rect[k]->len = 0;
1317 rectY[k] = MemNew(sizeof(Prym));
1318 rectY[k]->color = -1;
1319 rectY[k]->len = 0;
1320 if (y1*scaley>x1*scalex)
1321 rect[k]->len = y1*scaley - x1*scalex + 0.5;
1322 else
1323 rectY[k]->len = x1*scalex - y1*scaley + 0.5;
1324 first = FALSE;
1325 k++;
1326 rect[k] = MemNew(sizeof(Prym));
1327 rect[k]->color = 0;
1328 rect[k]->len = x1*scalex + 0.5;
1329 rectY[k] = MemNew(sizeof(Prym));
1330 rectY[k]->color = 0;
1331 rectY[k]->len = y1*scaley + 0.5;
1332 k++;
1333 }
1334 rect[k] = MemNew(sizeof(Prym));
1335 if (x1 == -1) {
1336 rect[k]->color = 3;
1337 rect[k]->len = SeqLocLen(slp->next)*scaley + 0.5;
1338 } else {
1339 rect[k]->color = color;
1340 rect[k]->len = SeqLocLen(slp)*scalex + 0.5;
1341 }
1342
1343 rectY[k] = MemNew(sizeof(Prym));
1344 if (y1 == -1) {
1345 rectY[k]->color = 3;
1346 rectY[k]->len = SeqLocLen(slp)*scalex + 0.5;
1347 } else {
1348 rectY[k]->color = color;
1349 rectY[k]->len = SeqLocLen(slp->next)*scaley + 0.5;
1350 }
1351 k++;
1352 ssp = ssp->next;
1353 }
1354 rect[k] = MemNew(sizeof(Prym));
1355 rect[k]->color = 0;
1356 if (sx != Seq_strand_minus) {
1357 rect[k]->len = (to - x2)*scalex + 0.5;
1358 } else {
1359 rect[k]->len = (x1 - from +1)*scalex + 0.5;
1360 }
1361 rectY[k] = MemNew(sizeof(Prym));
1362 rectY[k]->color = 0;
1363 if (sy != Seq_strand_minus) {
1364 rectY[k]->len = (tto - y2)*scaley + 0.5;
1365 } else {
1366 rectY[k]->len = (y1 - ffrom +1)*scaley + 0.5;
1367 }
1368 k++;
1369 }
1370
1371 return k;
1372 }
1373
DrawRectAlign(PrymPtr PNTR rect,Int2 k,Int2 color,Int2 height,Int2 index)1374 static void DrawRectAlign(PrymPtr PNTR rect, Int2 k, Int2 color, Int2 height, Int2 index)
1375 {
1376 Int2 l;
1377
1378 for (l=0; l < k; l++) {
1379 if (rect[l]->len <= 0) {
1380 if (l == 0 && rect[1] && rect[1]->len == 0) {
1381 printf("<img\n height=%d width=1 src='images/00.gif'>", height);
1382 }
1383 MemFree(rect[l]);
1384 continue;
1385 }
1386 if (rect[l]->len-1 == 0) {
1387 rect[l]->len++;
1388 }
1389 if (rect[l]->color == -1) {
1390 printf("<img\n height=2 src='images/0.gif'");
1391 printf(" width=%d>", rect[l]->len);
1392 } else if (rect[l]->color == 3) {
1393 if (rect[l+1]->color == rect[l]->color) {
1394 printf("<img\n height=5 src='images/3.gif'");
1395 printf(" width=%d>", rect[l]->len);
1396 } else {
1397 printf("<img\n height=5 src='images/3.gif'");
1398 printf(" width=%d>", rect[l]->len-1);
1399 printf("<img\n height=%d width=1 src='images/00.gif'>", height);
1400 }
1401 } else if (rect[l]->color == 0) {
1402 if (rect[l+1] && rect[l+1]->color != -1) {
1403 printf("<img\n height=2 src='images/00.gif'");
1404 printf(" width=%d>", rect[l]->len-1);
1405 printf("<img\n height=%d width=1 src='images/00.gif'>", height);
1406 } else {
1407 printf("<img\n height=2 src='images/00.gif'");
1408 printf(" width=%d>", rect[l]->len);
1409 }
1410 } else if (rect[l]->len > 0) {
1411 if (rect[l+1]->color == rect[l]->color) {
1412 if (index != -1) {
1413 printf("<A HREF=#%d><img height=%d src='images/%d.gif' BORDER=0",
1414 index, height, color);
1415 printf(" width=%d></a>", rect[l]->len);
1416 } else {
1417 printf("<img height=%d src='images/%d.gif'",
1418 height, color);
1419 printf(" width=%d>", rect[l]->len);
1420 }
1421 } else {
1422 if (index != -1) {
1423 printf("<A HREF=#%d><img height=%d src='images/%d.gif' BORDER=0",
1424 index, height, color);
1425 printf(" width=%d></a>", rect[l]->len-1);
1426 } else {
1427 printf("<img height=%d src='images/%d.gif'",
1428 height, color);
1429 printf(" width=%d>", rect[l]->len-1);
1430 }
1431 printf("<img\n height=%d width=1 src='images/00.gif'>", height);
1432 }
1433 }
1434 MemFree(rect[l]);
1435 }
1436 }
1437
1438 #define LOCAL_BUFLEN 255
1439
1440 static BioseqPtr
1441
FindSeqByAccession(CharPtr accessionOrGi,Int2 id_num,Boolean is_aa)1442 FindSeqByAccession(CharPtr accessionOrGi, Int2 id_num, Boolean is_aa)
1443 {
1444 BioseqPtr bsp = NULL;
1445
1446 #ifdef NCBI_ENTREZ_CLIENT
1447 CharPtr accession;
1448 Int4 version=0, gi, number;
1449 SeqIdPtr sip = NULL;
1450 TextSeqIdPtr tsip;
1451 PDBSeqIdPtr psip;
1452 SeqPortPtr spp;
1453 Int2 retval, buf_length=512;
1454 Uint1 buf[512];
1455 ByteStorePtr old_seq_data = NULL;
1456 Boolean fetch_from_nr = FALSE;
1457 Boolean id1_fetch_again = FALSE;
1458
1459 ID1BioseqFetchEnable ("wblast2", TRUE);
1460
1461 if ((gi = atoi(accessionOrGi)) == 0) {
1462 Char accver[LOCAL_BUFLEN];
1463 CharPtr ptrVersion = NULL;
1464
1465 if((sip = ValNodeNew (NULL)) == NULL)
1466 return NULL;
1467 if((tsip = TextSeqIdNew ()) == NULL)
1468 return NULL;
1469
1470 StringCpy(accver, accessionOrGi);
1471 accession = StringTokMT(accver, ".", &ptrVersion);
1472
1473 if (ptrVersion)
1474 version = atoi(ptrVersion);
1475
1476 tsip->accession = StringSave(accession);
1477 tsip->version = version;
1478 /* GenBank, EMBL, and DDBJ. */
1479 sip->choice = SEQID_GENBANK;
1480 sip->data.ptrvalue = (Pointer) tsip;
1481
1482 gi = ID1FindSeqId (sip);
1483
1484 if (gi == 0) {
1485 /* SwissProt. */
1486 sip->choice = SEQID_SWISSPROT;
1487 gi = ID1FindSeqId (sip);
1488 }
1489 if (gi == 0) {
1490 /* PIR */
1491 sip->choice = SEQID_PIR;
1492 gi = ID1FindSeqId (sip);
1493 }
1494
1495 if (gi == 0) {
1496 /* PRF */
1497 sip->choice = SEQID_PRF;
1498 gi = ID1FindSeqId (sip);
1499 }
1500
1501 if (gi == 0) {
1502 /* OTHER, probably 'ref' */
1503 sip->choice = SEQID_OTHER;
1504 gi = ID1FindSeqId (sip);
1505 }
1506
1507 if(gi == 0) {
1508 /* OK. We failed to find gi using string as TextSeqId. Now trying
1509 last time - with PDBSeqIdPtr */
1510
1511 if((psip = PDBSeqIdNew()) == NULL)
1512 return NULL;
1513
1514 sip->choice = SEQID_PDB;
1515 tsip = TextSeqIdFree(tsip);
1516 sip->data.ptrvalue = psip;
1517
1518 psip->mol = accession;
1519 psip->chain = version;
1520
1521 gi = ID1FindSeqId (sip);
1522 }
1523
1524 sip = SeqIdFree(sip);
1525 }
1526 if (gi > 0) {
1527 ValNodeAddInt(&sip, SEQID_GI, gi);
1528
1529 if(is_aa){
1530 #ifdef NCBI_INTERNAL_NEW_FORMATTER
1531 fetch_from_nr = TRUE;
1532 #endif
1533 }
1534 if (fetch_from_nr) {
1535 ID1BioseqFetchDisable();
1536 ReadDBBioseqFetchEnable ("bl2seq", "nr", !is_aa, TRUE);
1537 }
1538
1539 bsp = BioseqLockById(sip);
1540
1541
1542 if (!bsp && fetch_from_nr) { /*try id1 if fetch from nr failed*/
1543 id1_fetch_again = TRUE;
1544 ReadDBBioseqFetchDisable();
1545 ID1BioseqFetchEnable ("wblast2", TRUE);
1546 bsp = BioseqLockById(sip);
1547 }
1548 SeqIdFree(sip);
1549 }
1550
1551 if (bsp == NULL)
1552 return NULL;
1553
1554
1555 /* We need to keep this bioseq to preserve the protein information for the
1556 CDS translation feature. However we also need to pack the sequence data,
1557 so bsp->seq_data has to be modified.
1558 */
1559 if (ISA_na(bsp->mol)) {
1560 spp = SeqPortNew(bsp, 0, -1, Seq_strand_plus,
1561 Seq_code_iupacna);
1562 bsp->seq_data_type = Seq_code_iupacna;
1563 } else {
1564 spp = SeqPortNew(bsp, 0, -1, Seq_strand_unknown,
1565 Seq_code_ncbieaa);
1566 bsp->seq_data_type = Seq_code_ncbieaa;
1567 }
1568
1569 SeqPortSet_do_virtual(spp, TRUE);
1570 number = 0;
1571
1572 bsp->repr = Seq_repr_raw;
1573 old_seq_data = bsp->seq_data;
1574 bsp->seq_data = BSNew(bsp->length);
1575
1576 while (number < bsp->length) {
1577 retval = SeqPortRead(spp, buf, buf_length);
1578 if (retval < 0)
1579 continue;
1580 if (retval == 0)
1581 break;
1582 BSWrite(bsp->seq_data, buf, retval);
1583 number += retval;
1584 }
1585
1586 SeqPortFree(spp);
1587 old_seq_data = BSFree(old_seq_data);
1588 BioseqPack(bsp);
1589 if (fetch_from_nr && !id1_fetch_again) {
1590 ReadDBBioseqFetchDisable();
1591 } else {
1592 ID1BioseqFetchDisable();
1593 }
1594 #endif
1595
1596 return bsp;
1597 }
1598
BLASTOptionValidateHTML(BLAST_OptionsBlkPtr options,CharPtr progname,CharPtr seq1,CharPtr seq2,CharPtr one,CharPtr two,Boolean is_prot,Int2 mtrx,Int2 from,Int2 to,Int2 ffrom,Int2 tto,Int2 filter,Int2 pagecount)1599 static void BLASTOptionValidateHTML(BLAST_OptionsBlkPtr options, CharPtr progname, CharPtr seq1, CharPtr seq2, CharPtr one, CharPtr two, Boolean is_prot, Int2 mtrx, Int2 from, Int2 to, Int2 ffrom, Int2 tto, Int2 filter, Int2 pagecount)
1600 {
1601 Int2 status;
1602 ValNodePtr error_return=NULL;
1603
1604 status = BLASTOptionValidateEx(options, progname, &error_return);
1605 if (status != 0) {
1606 Blast2SeqMainPage(NULL, seq1, seq2, one, two, error_return, is_prot, options, mtrx, from, to, ffrom, tto, filter, pagecount);
1607 }
1608 return;
1609 }
1610
PrintParam(Boolean is_prot,Int2 mtrx,Int2 ma,Int2 ms,BLAST_OptionsBlkPtr options,CharPtr seq_2,CharPtr seq_1,CharPtr one,CharPtr two,BioseqPtr query_bsp,BioseqPtr subject_bsp,Int4 len1,Int4 len2,Int4 from,Int4 to,Int4 ffrom,Int4 tto,Int2 pagecount,Int4 view,Int4 mask_char,Int4 mask_color,Boolean cds_translation)1611 static void PrintParam(Boolean is_prot, Int2 mtrx, Int2 ma, Int2 ms, BLAST_OptionsBlkPtr options, CharPtr seq_2, CharPtr seq_1, CharPtr one, CharPtr two, BioseqPtr query_bsp, BioseqPtr subject_bsp, Int4 len1, Int4 len2, Int4 from, Int4 to, Int4 ffrom, Int4 tto, Int2 pagecount, Int4 view, Int4 mask_char, Int4 mask_color, Boolean cds_translation)
1612 {
1613 ValNodePtr vnp;
1614 CharPtr s;
1615 static Char buf[41];
1616 Int4 gi;
1617 CharPtr defline = NULL;
1618 printf("<FORM NAME= bl2 method=\"post\" action="
1619 #if defined (BL2SEQ_STANDALONE) && defined (NCBI_ENTREZ_CLIENT)
1620 "\"wblast2_cs.cgi"
1621 #else
1622 "\"wblast2.cgi"
1623 #endif
1624 "?%d\" enctype=\"multipart/form-data\">\n\n", pagecount);
1625
1626 printf("<INPUT TYPE=hidden name=\"page\" value=\"%d\">\n", pagecount+1);
1627 if (is_prot == TRUE) {
1628 #if defined (BL2SEQ_STANDALONE)
1629 printf("<A HREF=docs/options.html#matrix>Matrix</A>\n");
1630 #else
1631 printf("<A HREF=http://www.ncbi.nlm.nih.gov/BLAST/options.html#matrix TARGET=one>Matrix</A>\n");
1632 #endif
1633 printf("<select name=\"matrix\" onChange=\"update_mtrx(this)\">\n");
1634
1635 printf("<option value=0>BLOSUM62\n");
1636 printf("<option value=1%s>PAM30\n", (mtrx == 1) ? " SELECTED" : "");
1637 printf("<option value=2%s>PAM70\n", (mtrx == 2) ? " SELECTED" : "");
1638 printf("<option value=3%s>BLOSUM45\n", (mtrx == 3) ? " SELECTED" : "");
1639 printf("<option value=4%s>BLOSUM80\n", (mtrx == 4) ? " SELECTED" : "");
1640 printf("</select>\n");
1641 } else {
1642 printf("Match:<INPUT TYPE=text name=match size=2 value=%d>\n", ma);
1643 printf("Mismatch:<INPUT TYPE=text name=msmatch size=2 value=%d>\n", ms);
1644 }
1645 printf("gap open:<INPUT TYPE=text name=gopen size=2 value=%d>\n", options->gap_open);
1646 printf("gap extension: <INPUT TYPE=text size=2 name=\"gext\" value=%d> <BR>\n", options->gap_extend);
1647 printf("x_dropoff: <INPUT TYPE=text size=2 name=\"dropoff\" value=\"%d\">\n", options->gap_x_dropoff);
1648 printf("expect:<INPUT TYPE=text size=4 name=\"expect\" value=\"%f\">\n", options->expect_value);
1649 printf("wordsize: <INPUT type=text size=2 name=\"word\" ");
1650 printf("value=%d>\n\n", options->wordsize);
1651 #if defined (BL2SEQ_STANDALONE)
1652 printf("<a href=docs/newoptions.html#filter>Filter</a>");
1653 #else
1654 printf("<a href=http://www.ncbi.nlm.nih.gov/blast/newoptions.html#filter>Filter</a>");
1655 #endif
1656
1657 printf(" <INPUT TYPE=checkbox NAME=Filter VALUE=1");
1658 if (options->filter == 1) {
1659 printf(" CHECKED>\n");
1660 } else {
1661 printf(">\n");
1662 }
1663 #ifdef NCBI_INTERNAL_NEW_FORMATTER
1664 printf("View option <select name=\"view\">\n");
1665 printf("<option value=1 %s> Standard\n", view == 1 ? "SELECTED" : "");
1666 printf("<option value=2 %s> Mismatch-highlighting\n", view == 2 ? "SELECTED" : "");
1667 printf("</select> \n");
1668 printf("<BR>Masking character option <select name=\"mask_char\">\n");
1669 printf("<option value=0 %s> X for protein, n for nucleotide\n", mask_char == 0 ? "SELECTED" : "" );
1670 printf("<option value=2 %s> Lower case\n", mask_char == 2 ? "SELECTED" : "");
1671 printf("</select> \n");
1672
1673 printf("Masking color option <select name=\"mask_color\">\n");
1674 printf("<option value=1 %s> Black\n", mask_color == 1 ? "SELECTED" : "");
1675 printf("<option value=2 %s> Grey\n", mask_color == 2 ? "SELECTED" : "");
1676 printf("<option value=3 %s> Red\n", mask_color == 3 ? "SELECTED" : "");
1677 printf("</select> \n");
1678 printf("<BR><INPUT type=checkbox NAME=cds_translation");
1679 if (cds_translation) {
1680 printf(" CHECKED");
1681 }
1682 printf(">Show CDS translation\n");
1683 #endif
1684 printf("<INPUT TYPE=hidden name=\"program\" value=\"%s\">\n",
1685 options->program_name);
1686
1687 printf("<INPUT TYPE=hidden name=\"matrix\" value=\"%d\">\n", mtrx);
1688
1689 printf("<INPUT TYPE=submit VALUE=Align><HR>\n");
1690 if (seq_1 != NULL && *seq_1 != NULLB) {
1691 printf("<INPUT TYPE=hidden name=\"seq\" value=\"");
1692 for (s = seq_1; *s != '\0'; s++) {
1693 if (*s == '>') {
1694 printf(">");
1695 } else {
1696 printf("%c", *s);
1697 }
1698 }
1699 printf("\">\n");
1700 } else if (one != NULL) {
1701 printf("<INPUT TYPE=hidden name=\"one\" value=\"%s\">\n", one);
1702 }
1703 if (seq_2 != NULL && *seq_2 != NULLB) {
1704 printf("<INPUT TYPE=hidden name=\"sseq\" value=\"");
1705 for (s = seq_2; *s != '\0'; s++) {
1706 if (*s == '>') {
1707 printf(">");
1708 } else {
1709 printf("%c", *s);
1710 }
1711 }
1712 printf("\">\n");
1713 } else if (two != NULL) {
1714 printf("<INPUT TYPE=hidden name=\"two\" value=\"%s\">\n", two);
1715 }
1716 printf("<INPUT TYPE=hidden name=\"from\" value=\"%d\">\n", from);
1717 printf("<INPUT TYPE=hidden name=\"to\" value=\"%d\">\n", to);
1718 printf("<INPUT TYPE=hidden name=\"ffrom\" value=\"%d\">\n", ffrom);
1719 printf("<INPUT TYPE=hidden name=\"tto\" value=\"%d\">\n", tto);
1720
1721 printf("<BR><strong> Sequence 1</strong>: ");
1722 if ((gi = GetGIForSeqId(SeqIdFindBest(query_bsp->id, SEQID_GI))) != 0) {
1723 SeqIdWrite(SeqIdFindBestAccession(query_bsp->id), buf, PRINTID_FASTA_SHORT, 30);
1724 printf("<A HREF=http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=%ld>gi|%ld|%s</A>", gi, gi, buf);
1725 } else {
1726 SeqIdWrite(query_bsp->id, buf, PRINTID_FASTA_LONG, 40);
1727 printf("%s", buf);
1728 }
1729
1730 defline = BioseqGetTitle(query_bsp);
1731 if(defline) {
1732 StringNCpy(defline_buf, defline, DEFLINE_LENGTH);
1733 if(StringLen(defline) > DEFLINE_LENGTH) {
1734 StringCat(defline_buf, "...");
1735 }
1736 printf("%s", defline_buf);
1737 }
1738
1739 printf("<BR>");
1740
1741 printf("Length = %ld\n", len1);
1742 if (from != 0 || to != 0) {
1743 printf("(%ld .. %ld)\n", from, to);
1744 }
1745 printf("<BR><BR>\n");
1746 printf("<strong> Sequence 2</strong>: ");
1747 if ((gi = GetGIForSeqId(SeqIdFindBest(subject_bsp->id, SEQID_GI))) != 0) {
1748 SeqIdWrite(SeqIdFindBestAccession(subject_bsp->id), buf, PRINTID_FASTA_SHORT, 30);
1749 printf("<A HREF=http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=%ld>gi|%ld|%s</A>", gi, gi, buf);
1750 } else {
1751 SeqIdWrite(subject_bsp->id, buf, PRINTID_FASTA_LONG, 40);
1752 printf("%s", buf);
1753 }
1754
1755 defline = BioseqGetTitle(subject_bsp);
1756 if(defline) {
1757 StringNCpy(defline_buf, defline, DEFLINE_LENGTH - 3);
1758 if(StringLen(defline) > DEFLINE_LENGTH) {
1759 StringCat(defline_buf, "...");
1760 }
1761 printf("%s", defline_buf);
1762 }
1763 printf("<BR>\n");
1764
1765 printf("Length = %ld\n", len2);
1766 if (ffrom != 0 || tto != 0) {
1767 printf("(%ld .. %ld)\n", ffrom, tto);
1768 }
1769 printf("<BR><BR><BR>\n");
1770
1771 return;
1772 }
1773
1774 #ifndef BL2SEQ_STANDALONE
1775 /** Thread to take care of log messages when termination by signal */
ConnectionThreadRun(void * p)1776 static void* ConnectionThreadRun(void *p)
1777 {
1778 int nfds;
1779 int rc;
1780 struct pollfd *fds=NULL;
1781
1782 nfds = 1;
1783
1784 fds = (struct pollfd *)
1785 malloc( nfds*sizeof(struct pollfd) );
1786
1787
1788 fds[0].fd = fileno(stdout);
1789 fds[0].events = POLLOUT;
1790 fds[0].revents = 0;
1791
1792 poll(NULL, 0, 2000);
1793
1794 while (run_status != WBLAST2_DONE) {
1795 if (sigflag) {
1796 int signo = GetSignal();
1797
1798 if (signo == SIGXCPU) {
1799 if (run_status == WBLAST2_FORMAT) {
1800 /* Ignore CPU limit if formatting has already started */
1801 signo = 0;
1802 } else {
1803 printf("<HTML>\n<HEAD>\n<TITLE>\nTime Expired\n</TITLE>\n</HEAD>\n");
1804 printf("<BODY>\n<H1>\nTime expired\n</H1>\n<HR>\n</BODY>\n</HTML>\n");
1805 fflush(NULL);
1806 }
1807 }
1808 if (signo) {
1809 logmsg(signo);
1810 poll(NULL, 0, 1000);
1811 exit(signo);
1812 }
1813 }
1814
1815 /* If no signal has been received yet, test the stdout socket */
1816 rc = poll(fds, nfds, 1000);
1817
1818 if (run_status == WBLAST2_DONE)
1819 break;
1820
1821 if (rc < 0 ||
1822 (fds[0].revents & (POLLHUP | POLLNVAL | POLLERR))) {
1823 logmsg(fds[0].revents);
1824 close(fds[0].fd);
1825 abort();
1826 } else if (run_status == WBLAST2_SEARCH) {
1827 char buffer[4096];
1828 memset(buffer, ' ', 4095);
1829 buffer[4095] = NULLB;
1830 /*strcpy(buffer, " ");*/
1831
1832 fprintf(stdout, "<!--%s-->\n", buffer);
1833 fflush(stdout);
1834 }
1835 if (run_status != WBLAST2_DONE) /* Sleep 4 seconds */
1836 poll(NULL, 0, 4000);
1837 }
1838
1839 MemFree(fds);
1840
1841 return NULL;
1842 }
1843 #endif
1844
1845 #ifndef USE_OLD_BLAST
BLASTOptions2SummaryOptions(BLAST_OptionsBlk * options,Char * progname,BLAST_SummaryOptions * s_options)1846 static void BLASTOptions2SummaryOptions(BLAST_OptionsBlk* options,
1847 Char* progname,
1848 BLAST_SummaryOptions* s_options)
1849 {
1850 if (!options || !s_options)
1851 return;
1852
1853 s_options->hint = eNone;
1854
1855 if (options->is_megablast_search)
1856 s_options->use_megablast = TRUE;
1857
1858 if (!strcmp(progname, "blastn"))
1859 s_options->program = eBlastn;
1860 else if (!strcmp(progname, "blastp"))
1861 s_options->program = eBlastp;
1862 else if (!strcmp(progname, "blastx"))
1863 s_options->program = eBlastx;
1864 else if (!strcmp(progname, "tblastn"))
1865 s_options->program = eTblastn;
1866 else if (!strcmp(progname, "tblastx"))
1867 s_options->program = eTblastx;
1868 else
1869 s_options->program = eChoose;
1870
1871 s_options->strand = options->strand_option;
1872 s_options->cutoff_evalue = options->expect_value;
1873 if (options->matrix)
1874 s_options->matrix = strdup(options->matrix);
1875
1876
1877 if (options->filter_string)
1878 s_options->filter_string = strdup(options->filter_string);
1879 else /* If filtering option not set, assume no filtering! */
1880 s_options->filter_string = strdup("F");
1881
1882 s_options->word_size = options->wordsize;
1883 s_options->gapped_calculation = options->gapped_calculation;
1884 s_options->nucleotide_match = options->reward;
1885 s_options->nucleotide_mismatch = options->penalty;
1886 s_options->gap_open = options->gap_open;
1887 s_options->gap_extend = options->gap_extend;
1888 s_options->gap_x_dropoff = options->gap_x_dropoff;
1889 s_options->db_length = options->db_length;
1890 s_options->word_threshold = options->threshold_second;
1891
1892 if (!options->multiple_hits_only)
1893 s_options->init_seed_method = eOneHit;
1894 }
1895 #endif
1896
1897 typedef enum {
1898 UNDEFINED_SEQ_TYPE,
1899 FASTA_WITH_DEFLINE,
1900 BARE_FASTA,
1901 NCBI_SEQID,
1902 ACCESSION_GI
1903 } BlastQuerySequenceType;
1904
1905
1906 #define MAX_ALLOWED_ID_LEN 20
1907 #define IS_NEWLINE(a) ((a)=='\0' || (a)=='\n' || (a)=='\r')
get_sequence_type(char * querystr)1908 static int get_sequence_type(char *querystr)
1909 {
1910 int seqtype = UNDEFINED_SEQ_TYPE;
1911 int length;
1912 char *line, *remainder, *word;
1913
1914 /* If query starts with '>', then it is a FASTA with defline */
1915 if (*querystr == '>')
1916 return FASTA_WITH_DEFLINE;
1917
1918 /* If Entrez client is unavailable, accessions or gis cannot be entered,
1919 so always treat sequence input as FASTA. */
1920 #ifndef NCBI_ENTREZ_CLIENT
1921 return BARE_FASTA;
1922 #endif
1923
1924 for (remainder = querystr; !IS_NEWLINE(*remainder); remainder++) ;
1925
1926 length = remainder - querystr;
1927
1928 line = (char *) Malloc(length + 1);
1929 strncpy(line, querystr, length);
1930 line[length] = NULLB;
1931
1932 /* If first line contains a '|', then it is presumably an NCBI sequence
1933 identifier */
1934 if (strstr(line, "|") != NULL) {
1935 sfree(line);
1936 return NCBI_SEQID;
1937 }
1938 /* Accessions or gis cannot be more than 20 characters */
1939 if (length > MAX_ALLOWED_ID_LEN) {
1940 sfree(line);
1941 return BARE_FASTA;
1942 }
1943 /* If first line contains a white space between non-white space, then it
1944 is a bare sequence, possibly from GenBank flat file */
1945 word = StringTokMT(line, " \t", &remainder);
1946
1947 if (remainder)
1948 StringTokMT(remainder, " \t", &remainder);
1949
1950 if (remainder) {
1951 sfree(line);
1952 return BARE_FASTA;
1953 }
1954 while (*word != NULLB && !IS_DIGIT(*word))
1955 word++;
1956 if (*word != NULLB)
1957 seqtype = ACCESSION_GI;
1958 else
1959 seqtype = BARE_FASTA;
1960 sfree(line);
1961 return seqtype;
1962 }
1963
1964
1965 //This function
1966 //1. Reads sequence entry field ("SEQ" for seqNbr=1 or SSEQ for seqNbr=2)
1967 //2. Reads file entry field (SEQFILE1 for seqNbr=1 or SEQFILE2 for seqNbr=2)
1968 //3. If coming from blast reads accesion/gi entry field ("ONE" for seqNbr=1 or TWO for seqNbr=2)
1969 //4. Determines sequence type
1970 //5. If raw sequence without subject line inserts subject line
1971 //6. Returns pointer to the entered sequence(raw or accession)
GetAndFormatEnteredSequence(WWWBlastInfoPtr theInfo,int seqNbr,int * seq_type)1972 static char * GetAndFormatEnteredSequence(WWWBlastInfoPtr theInfo,int seqNbr,int *seq_type)
1973 {
1974 CharPtr seq=NULL, c, chptr = NULL, sbuf;
1975 static Char seqEntryName[5],seqFileEntryName[9],blastReqEntryName[4];
1976
1977
1978 if(seqNbr == 1) {
1979 sprintf(seqEntryName,"SEQ"); //entry box name will be "SEQ"
1980 //need to keep this if request comes directly from blast
1981 sprintf(blastReqEntryName,"ONE");
1982 }
1983 else {
1984 sprintf(seqEntryName,"SSEQ"); //entry box name will be "SSEQ"
1985 //need to keep this if request comes directly from blast
1986 sprintf(blastReqEntryName,"TWO");
1987 }
1988 sprintf(seqFileEntryName,"SEQFILE%d",seqNbr); //entry box name will be "SEQFILE1" or "SEQFILE2"
1989
1990
1991 #ifdef NCBI_ENTREZ_CLIENT
1992 //need to keep those if request comes directly from blast
1993 chptr = WWWGetValueByName(theInfo->info, blastReqEntryName);
1994 #endif
1995 //If ONE or TWO chptr will contain accession or GI
1996
1997 if (!chptr) {
1998 //Now check sequence entry boxes
1999 if((chptr = WWWGetValueByName(theInfo->info, seqEntryName)) == NULL ||
2000 *chptr == NULLB)
2001 chptr = WWWGetValueByName(theInfo->info, seqFileEntryName);
2002 }
2003
2004 if (chptr) {
2005 while (IS_WHITESP(*chptr))
2006 chptr++;
2007 }
2008 if (chptr && *chptr != NULLB)
2009 c = StringSave(chptr);
2010 else
2011 c = NULL;
2012
2013 if(c) {
2014 *seq_type = get_sequence_type(c);
2015 //Treat NCBI_SEQID seq type the same way as ACCESSION_GI
2016 *seq_type = (*seq_type == NCBI_SEQID) ? ACCESSION_GI : *seq_type;
2017 //Check here if sequence is enetered with subjetc line !!!
2018 if(*seq_type != ACCESSION_GI && *seq_type != FASTA_WITH_DEFLINE) {
2019 // if (c1 && *c1 != '>') {
2020 sbuf = MemNew(StringLen(c)+8);
2021 sprintf(sbuf, ">seq_%d\n%s", seqNbr, c);
2022 seq = StringSave(sbuf);
2023 MemFree(c);
2024 c = seq;
2025 MemFree(sbuf);
2026 }
2027 if(*seq_type == ACCESSION_GI) {
2028 StringTok(c, "\n\r\t");
2029 }
2030 }
2031 return c;
2032 }
2033
2034
2035 //This function readjusts subject lines for two sequences if they are
2036 //originally the same
AdjustSubjectLine(CharPtr * pSeq1EntryData,CharPtr * pSeq2EntryData)2037 static void AdjustSubjectLine(CharPtr *pSeq1EntryData,CharPtr *pSeq2EntryData)
2038 {
2039 CharPtr seq1,seq2,sbuf;
2040
2041 seq1 = *pSeq1EntryData;
2042 seq2 = *pSeq2EntryData;
2043
2044 if (**pSeq1EntryData == '>')
2045 seq1 = seq1 + 1;
2046 sbuf = Malloc(StringLen(seq1)+4);
2047 sprintf(sbuf, ">1_%s", seq1);
2048 seq1 = StringSave(sbuf);
2049 sbuf = MemFree(sbuf);
2050 MemFree(*pSeq1EntryData);
2051 *pSeq1EntryData = seq1;
2052 if (**pSeq2EntryData == '>')
2053 seq2 = seq2 + 1;
2054 sbuf = Malloc(StringLen(seq2)+4);
2055 sprintf(sbuf, ">2_%s",seq2);
2056 seq2 = StringSave(sbuf);
2057 MemFree(sbuf);
2058 MemFree(*pSeq2EntryData);
2059 *pSeq2EntryData = seq2;
2060 }
2061
2062
2063
2064
2065 #define BL2SEQ_CPU_LIMIT 240
2066
Main(void)2067 Int2 Main(void)
2068 {
2069 SeqEntryPtr query_sep = NULL, subject_sep = NULL;
2070 BioseqPtr fake_bsp, query_bsp = NULL, subject_bsp = NULL;
2071 SeqIntPtr sip1, sip2;
2072 Int2 index = 0;
2073 Int4 gopen, gext, dropoff, ma=0, ms=0;
2074 Int4 from=0, to=0, ffrom=0, tto=0;
2075 Int2 wordsize, filter=0;
2076 Int2 mtrx = 0, color=1;
2077 SeqAlignPtr seqalign = NULL, sap, sapnext;
2078 SeqAnnotPtr hsat= NULL, sat, satnext;
2079 SBlastSeqalignArray* seqalign_arr=NULL;
2080 FloatHi expect;
2081 Boolean is_prot=FALSE, is_aa1=FALSE, is_aa2=FALSE, is_na1=TRUE, is_na2=TRUE;
2082 CharPtr seq_1=NULL, seq_2=NULL, chptr;
2083 CharPtr sq_1=NULL, sq_2=NULL, progname;
2084
2085 CharPtr accessionOrGi_1=NULL,accessionOrGi_2=NULL;
2086 CharPtr seq1EntryData, seq2EntryData;
2087
2088 static Char mbuf[12];
2089 BLAST_OptionsBlkPtr options = NULL;
2090 Int4 ll, len1, len2, txoption;
2091 SeqIdPtr sip;
2092 Int4Ptr PNTR txmatrix = NULL;
2093 BLAST_MatrixPtr blast_matrix = NULL;
2094 SeqPortPtr spp;
2095 Uint1 code1, code2;
2096 ValNodePtr error_return=NULL;
2097 FloatHi scalex, scaley;
2098 DenseDiagPtr ddp;
2099 DenseSegPtr dsp;
2100 StdSegPtr ssp;
2101 Int4 index1, i2;
2102 Int4 x, y, xx, yy;
2103 Int2 k, x_factor, y_factor;
2104 static PrymPtr PNTR rect;
2105 static PrymPtr PNTR rectY;
2106 SeqLocPtr slp1=NULL, slp2=NULL, slp, sl, qslp = NULL;
2107 Uint1 align_type;
2108 ValNodePtr other_returns, mask = NULL, mask_head = NULL;
2109 CharPtr buffer = NULL;
2110 TxDfDbInfoPtr dbinfo = NULL;
2111 BlastTimeKeeper time_keeper;
2112 int seq_1_type = UNDEFINED_SEQ_TYPE, seq_2_type = UNDEFINED_SEQ_TYPE;
2113
2114 WWWBlastInfoPtr theInfo;
2115 ReadDBFILEPtr rdfp;
2116 Int4 dbseq_num;
2117 CharPtr dbname;
2118 CharPtr error_msg;
2119 Int2 pagecount = 0;
2120 Boolean is_megablast = FALSE;
2121 CharPtr rid = NULL, database = NULL;
2122 sigset_t sigset;
2123 struct sigaction sa;
2124 Int4 mask_color =1, mask_char = 0, view = 1;
2125 Boolean cds_translation = FALSE;
2126 CharPtr value_holder = NULL;
2127 #ifndef BL2SEQ_STANDALONE
2128 TNlmThread connection_thread = NULL;
2129 void *thrstat;
2130 CharPtr program_log, user_IP;
2131 int pid, time_start, results_size = 0;
2132 #endif
2133
2134 #ifndef USE_OLD_BLAST
2135 BLAST_SummaryOptions* s_options = NULL;
2136 Blast_SummaryReturn* s_return = NULL;
2137 Boolean mask_at_hash = FALSE;
2138 EBlastProgramType program_number;
2139 #endif
2140
2141 #ifdef RLIMIT_CPU
2142 struct rlimit rl;
2143
2144 #ifdef SIGXCPU
2145 sa.sa_flags = SA_RESTART;
2146 sa.sa_handler = sighandler;
2147 sigaction(SIGXCPU, &sa, NULL);
2148 #endif
2149 getrlimit(RLIMIT_CPU, &rl);
2150 rl.rlim_cur = BL2SEQ_CPU_LIMIT;
2151 rl.rlim_max = BL2SEQ_CPU_LIMIT + 120;
2152 setrlimit(RLIMIT_CPU, &rl);
2153 #endif
2154
2155 /* mask all signals when in signal handler */
2156 sigfillset(&sigset);
2157 sa.sa_mask = sigset;
2158
2159 /* Handle SIGPIPE */
2160 sa.sa_flags = SA_RESTART;
2161 sa.sa_handler = sighandler;
2162 sigaction(SIGPIPE, &sa, NULL);
2163
2164 if (!SeqEntryLoad())
2165 return 1;
2166
2167 UseLocalAsnloadDataAndErrMsg ();
2168
2169 ErrSetMessageLevel(SEV_WARNING);
2170
2171 PrepareSigmask();
2172
2173 /* This function will read posting data, set-up config file and
2174 write small message into logfile (if it exists) */
2175
2176 theInfo = MemNew(sizeof(WWWBlastInfo));
2177
2178 if(WWWGetArgs(&theInfo->info) != WWWErrOk) {
2179 WWWInfoFree(theInfo->info);
2180 MemFree(theInfo);
2181 Blast2SeqMainPage(NULL, NULL, NULL, NULL, NULL, NULL, FALSE,
2182 NULL, 0, 0, 0, 0, 0, 1, 0);
2183 return 1;
2184 }
2185 if((chptr = WWWGetQuery(theInfo->info)) == NULL || *chptr == NULLB) {
2186 WWWInfoFree(theInfo->info);
2187 MemFree(theInfo);
2188 Blast2SeqMainPage(NULL, NULL, NULL, NULL, NULL, NULL, FALSE,
2189 NULL, 0, 0, 0, 0, 0, 1, 0);
2190 }
2191
2192 if(getenv("DEBUG_COMMAND_LINE") != NULL) {
2193 FILE *fd;
2194 fd = FileOpen("__web.in", "w");
2195 fprintf(fd, "%s", ((WWWInfoDataPtr)theInfo->info)->query);
2196 FileClose(fd);
2197 }
2198
2199 value_holder = WWWGetValueByName(theInfo->info, "mask_color");
2200 if(value_holder) {
2201 mask_color = atoi (value_holder);
2202 }
2203 value_holder = WWWGetValueByName(theInfo->info, "mask_char");
2204 if(value_holder) {
2205 mask_char = atoi (value_holder);
2206 }
2207 value_holder = WWWGetValueByName(theInfo->info, "view");
2208 if(value_holder) {
2209 view = atoi (value_holder);
2210 }
2211
2212 if (WWWGetValueByName(theInfo->info, "cds_translation") != NULL) {
2213 cds_translation = TRUE;
2214 }
2215
2216 if((chptr = WWWGetValueByName(theInfo->info, "PROGRAM")) != NULL)
2217 theInfo->program = StringSave(chptr);
2218 else if ((chptr = WWWGetValueByName(theInfo->info, "PROT")) != NULL)
2219 theInfo->program = StringSave(chptr);
2220 else
2221 theInfo->program = StringSave("blastn");
2222
2223 theInfo->align_type =
2224 BlastGetTypes(theInfo->program, &theInfo->query_is_na,
2225 &theInfo->db_is_na);
2226
2227 is_aa1 = !theInfo->query_is_na;
2228 is_aa2 = !theInfo->db_is_na;
2229 is_prot = (is_aa1 || is_aa2);
2230
2231 progname = theInfo->program;
2232
2233 /* For tblastx, we still do a protein search, even though both sequences
2234 are nucleotide. */
2235 if (!(StringICmp(progname, "tblastx")))
2236 is_prot = TRUE;
2237
2238
2239 if ((chptr = WWWGetValueByName(theInfo->info, "MEGABLAST")) != NULL)
2240 is_megablast = TRUE;
2241 options = BLASTOptionNewEx(progname, TRUE, is_megablast);
2242
2243 if ((chptr = WWWGetValueByName(theInfo->info, "PAGE")) != NULL)
2244 pagecount = atoi(chptr);
2245
2246 if ((chptr = WWWGetValueByName(theInfo->info, "GOPEN")) != NULL &&
2247 StringStr(chptr, "default") == NULL)
2248 gopen = atoi(chptr);
2249 else
2250 gopen = -1;
2251
2252 if ((chptr = WWWGetValueByName(theInfo->info, "GEXT")) != NULL &&
2253 StringStr(chptr, "default") == NULL)
2254 gext = atoi(chptr);
2255 else
2256 gext = -1;
2257
2258 if((chptr = WWWGetValueByName(theInfo->info, "DROPOFF")) != NULL &&
2259 StringStr(chptr, "default") == NULL)
2260 dropoff = atoi(chptr);
2261 else
2262 dropoff = 50;
2263
2264 if((chptr = WWWGetValueByName(theInfo->info, "EXPECT")) != NULL &&
2265 StringStr(chptr, "default") == NULL)
2266 expect = atof(chptr);
2267 else
2268 expect = 10;
2269
2270 if((chptr = WWWGetValueByName(theInfo->info, "MATCH")) != NULL &&
2271 StringStr(chptr, "default") == NULL)
2272 ma = atoi(chptr);
2273 else if (!StrCmp(progname, "blastn"))
2274 ma = 1;
2275 if((chptr = WWWGetValueByName(theInfo->info, "MSMATCH")) != NULL &&
2276 StringStr(chptr, "default") == NULL)
2277 ms = atoi(chptr);
2278 else if (!StrCmp(progname, "blastn"))
2279 ms = -2;
2280
2281 if((chptr = WWWGetValueByName(theInfo->info, "FROM")) != NULL &&
2282 StringStr(chptr, "default") == NULL)
2283 from = atoi(chptr);
2284 else
2285 from = 0;
2286 if((chptr = WWWGetValueByName(theInfo->info, "FFROM")) != NULL &&
2287 StringStr(chptr, "default") == NULL)
2288 ffrom = atoi(chptr);
2289 else
2290 ffrom = 0;
2291 if((chptr = WWWGetValueByName(theInfo->info, "TO")) != NULL &&
2292 StringStr(chptr, "default") == NULL)
2293 to = atoi(chptr);
2294 else
2295 to = 0;
2296 if((chptr = WWWGetValueByName(theInfo->info, "TTO")) != NULL &&
2297 StringStr(chptr, "default") == NULL)
2298 tto = atoi(chptr);
2299 else
2300 tto = 0;
2301
2302 if((chptr = WWWGetValueByName(theInfo->info, "STRAND")) != NULL &&
2303 StringStr(chptr, "default") == NULL)
2304 options->strand_option = atoi(chptr);
2305 else
2306 options->strand_option = Seq_strand_both;
2307
2308 #ifdef NCBI_ENTREZ_CLIENT
2309 //Moved this into function GetAndFormatEnteredSequence()
2310 //need to keep those if request comes directly from blast
2311 //one = WWWGetValueByName(theInfo->info, "ONE");
2312 //two = WWWGetValueByName(theInfo->info, "TWO");
2313 #endif
2314 options->cpu_limit = BL2SEQ_CPU_LIMIT;
2315
2316 if((chptr = WWWGetValueByName(theInfo->info, "WORD")) != NULL &&
2317 StringStr(chptr, "default") == NULL)
2318 wordsize = atoi(chptr);
2319 else if (!is_megablast)
2320 wordsize = (is_prot == TRUE) ? 3 : 11;
2321 else {
2322 wordsize = 28;
2323 }
2324
2325 if (WWWGetValueByName(theInfo->info, "FILTER") != NULL)
2326 filter =1;
2327
2328 run_status = WBLAST2_SEARCH;
2329
2330 #ifndef BL2SEQ_STANDALONE
2331 /** fire thread testing connection; not done in the standalone
2332 WWW server package */
2333 connection_thread = NlmThreadCreateEx(ConnectionThreadRun, NULL,
2334 THREAD_RUN|THREAD_BOUND, eTP_Default, NULL, NULL);
2335 #endif
2336
2337 //At this point seq_1,seq_2,accessionOrGi_1,accessionOrGi_2 are NULLS
2338 seq1EntryData = GetAndFormatEnteredSequence(theInfo,1,&seq_1_type);
2339 seq2EntryData = GetAndFormatEnteredSequence(theInfo,2,&seq_2_type);
2340
2341 //seq_type will contain one of the following after GetEnteredSequence() call
2342 //UNDEFINED_SEQ_TYPE,FASTA_WITH_DEFLINE, BARE_FASTA,NCBI_SEQID,ACCESSION_GI
2343
2344 //seq1or2EntryData - will conatain sequence entered in any format in SEQ or SSEQ
2345 //if seq_type != ACCESSION_GI, seqEntryData at this point will contain
2346 //raw sequence with subject line
2347 //otherwise it will contain - accesion/gi
2348
2349 if(seq1EntryData && seq2EntryData &&
2350 seq_1_type != ACCESSION_GI && seq_2_type != ACCESSION_GI &&
2351 StrNCmp(seq1EntryData, seq2EntryData, 2) == 0 ) {
2352 AdjustSubjectLine(&seq1EntryData,&seq2EntryData);
2353 }
2354
2355 if(seq_1_type == ACCESSION_GI) {
2356 accessionOrGi_1 = seq1EntryData;
2357 }
2358 else {
2359 seq_1 = seq1EntryData;
2360 }
2361
2362 if(seq_2_type == ACCESSION_GI) {
2363 accessionOrGi_2 = seq2EntryData;
2364 }
2365 else {
2366 seq_2 = seq2EntryData;
2367 }
2368
2369
2370 //seq1EntryData - data enetered in SEQ, could be raw sequence or GI/Accession
2371 //seq2EntryData - data enetered in SSEQ, could be raw sequence or GI/Accession
2372 //seq_1, seq_2 - raw sequence with subject line
2373 //accessionOrGi_1, accessinOrGi_2 - accesion number or GI
2374
2375 //After that changed all one,two to accessionOrGi_1 or accessionOrGi_2
2376
2377
2378 if (seq_1 != NULL) {
2379 query_sep = FastaToSeqBuff(seq_1, &sq_1, !is_aa1);
2380 query_bsp = (BioseqPtr) query_sep->data.ptrvalue;
2381 is_na1 = FastaCheckDna(seq_1);
2382 } else if (accessionOrGi_1 != NULL && *accessionOrGi_1 != NULLB) {
2383 if ((rid = WWWGetValueByName(theInfo->info, "RID")) != NULL) {
2384 #ifndef BL2SEQ_STANDALONE
2385 /* Get the query sequence from the QBlast results */
2386 sip = SeqIdParse(accessionOrGi_1);
2387
2388 QBlastGetResultsEx(rid, NULL, &query_bsp, NULL, &database, NULL,
2389 NULL, sip);
2390 #endif
2391 if (!query_bsp) {
2392 error_msg = "The query sequence is not found";
2393
2394 Blast2SeqMainPage(error_msg, seq1EntryData, seq2EntryData,
2395 NULL, NULL, NULL, is_prot, options, mtrx,
2396 from, to, ffrom, tto, filter, pagecount);
2397 }
2398 } else {
2399 if ((query_bsp = FindSeqByAccession(accessionOrGi_1, 1, is_aa1)) == NULL) {
2400 error_msg = "The first sequence accession is not found";
2401
2402 Blast2SeqMainPage(error_msg, seq1EntryData, seq2EntryData,
2403 NULL, NULL, NULL, is_prot, options, mtrx,
2404 from, to, ffrom, tto, filter, pagecount);
2405 }
2406 }
2407 if (query_bsp)
2408 is_na1 = ISA_na(query_bsp->mol);
2409 }
2410 if (seq_2 != NULL) {
2411 subject_sep = FastaToSeqBuff(seq_2, &sq_2, !is_aa2);
2412 subject_bsp = (BioseqPtr) subject_sep->data.ptrvalue;
2413 is_na2 = FastaCheckDna(seq_2);
2414 } else if (accessionOrGi_2 != NULL && *accessionOrGi_2 != NULLB) {
2415 if (rid && database) {
2416 /* Fetch the subject seqience from the BLAST database by the
2417 SeqId provided in the link */
2418 sip = SeqIdParse(accessionOrGi_2);
2419 ReadDBBioseqFetchEnable("wblast2", database, TRUE, TRUE);
2420 subject_bsp = BioseqLockById(sip);
2421 ReadDBBioseqFetchDisable();
2422 if (!subject_bsp) {
2423 error_msg = "The database sequence is not found";
2424
2425 Blast2SeqMainPage(error_msg, seq1EntryData, seq2EntryData,
2426 NULL, NULL, NULL, is_prot, options, mtrx,
2427 from, to, ffrom, tto, filter, pagecount);
2428 }
2429 } else {
2430 subject_bsp = FindSeqByAccession(accessionOrGi_2, 2, is_aa2);
2431 }
2432 if (subject_bsp == NULL) {
2433 error_msg = "The second sequence accession is not found";
2434
2435 Blast2SeqMainPage(error_msg, seq1EntryData, seq2EntryData,
2436 NULL, NULL, NULL, is_prot, options, mtrx,
2437 from, to, ffrom, tto, filter, pagecount);
2438 }
2439 is_na2 = ISA_na(subject_bsp->mol);
2440 }
2441 code1 = ((is_aa1 == FALSE) ? Seq_code_iupacna:Seq_code_iupacaa);
2442 code2 = ((is_aa2 == FALSE) ? Seq_code_iupacna:Seq_code_iupacaa);
2443
2444 if (is_prot) {
2445 if((chptr = WWWGetValueByName(theInfo->info, "MATRIX")) == NULL ||
2446 *chptr == NULLB)
2447 chptr = WWWGetValueByName(theInfo->info, "MTRX");
2448 if (chptr && *chptr != NULLB)
2449 mtrx = atoi(chptr);
2450 else
2451 mtrx = 0;
2452
2453 switch(mtrx) {
2454 case 1:
2455 sprintf(mbuf, "PAM30");
2456 if (gopen == -1 || gext == -1) {
2457 gopen = (gopen == -1) ? 9: gopen;
2458 gext = (gext == -1) ? 1: gext;
2459 }
2460 break;
2461 case 2:
2462 sprintf(mbuf, "PAM70");
2463 if (gopen == -1 || gext == -1) {
2464 gopen = (gopen == -1) ? 10: gopen;
2465 gext = (gext == -1) ? 1: gext;
2466 }
2467 break;
2468 case 3:
2469 sprintf(mbuf, "BLOSUM45");
2470 if (gopen == -1 || gext == -1) {
2471 gopen = (gopen == -1) ? 15: gopen;
2472 gext = (gext == -1) ? 2: gext;
2473 }
2474 break;
2475 case 4:
2476 sprintf(mbuf, "BLOSUM80");
2477 if (gopen == -1 || gext == -1) {
2478 gopen = (gopen == -1) ? 10: gopen;
2479 gext = (gext == -1) ? 1: gext;
2480 }
2481 break;
2482 default:
2483 sprintf(mbuf, "BLOSUM62");
2484 if (gopen == -1 || gext == -1) {
2485 gopen = (gopen == -1) ? 11: gopen;
2486 gext = (gext == -1) ? 1: gext;
2487 }
2488 break;
2489 }
2490
2491 StringCpy(options->matrix, mbuf);
2492
2493 /* Use one-pass initial word search with low threshold, except for
2494 tblastx. Also do not use multiple hits approach for finding initial
2495 seeds. */
2496 options->two_pass_method = FALSE;
2497 if (StrCmp(progname, "tblastx")) {
2498 options->threshold_second = 9;
2499 options->multiple_hits_only = FALSE;
2500 }
2501 } else {
2502 if (gopen == -1)
2503 gopen = (is_megablast) ? 0 : 5;
2504 if (gext == -1)
2505 gext = (is_megablast) ? 0 : 2;
2506 options->penalty = ms;
2507 options->reward = ma;
2508 }
2509
2510 if ((chptr = WWWGetValueByName(theInfo->info, "DBLEN")) != NULL) {
2511 options->db_length = atoi(chptr);
2512 } else {
2513 if((chptr = WWWGetValueByName(theInfo->info, "DB")) != NULL &&
2514 StrCmp(chptr, "=pdb") == 0) {
2515 dbname = StringSave("pdb");
2516 } else {
2517 dbname = StringSave("nr");
2518 }
2519
2520 rdfp = readdb_new(dbname, is_aa2);
2521 if (rdfp) {
2522 readdb_get_totals_ex(rdfp, &options->db_length, &dbseq_num, TRUE);
2523 readdb_destruct(rdfp);
2524 } else
2525 options->db_length = (is_aa2 ? NR_SIZE_AA : NR_SIZE_NA);
2526
2527 MemFree(dbname);
2528 }
2529
2530 options->wordsize = wordsize;
2531 if (is_megablast) {
2532 options->cutoff_s2 = options->wordsize*options->reward;
2533 options->cutoff_s = (options->wordsize + 4)*options->reward;
2534 }
2535 options->gap_open = gopen;
2536 options->gap_extend = gext;
2537 options->gap_x_dropoff = dropoff;
2538 options->expect_value = expect;
2539 options->filter = filter;
2540 if (filter == 1) {
2541 options->filter_string = StringSave("T");
2542 }
2543
2544 if (!StringICmp(progname, "tblastn") ||
2545 !(StringICmp(progname, "tblastx")))
2546 options->db_genetic_code = 1;
2547
2548 if (query_bsp == NULL || subject_bsp == NULL) {
2549 error_msg = "Please enter the sequences";
2550
2551 Blast2SeqMainPage(error_msg, seq1EntryData, seq1EntryData,
2552 NULL, NULL, NULL, is_prot, options, mtrx,
2553 from, to, ffrom, tto, filter, pagecount);
2554 }
2555
2556 error_return = NULL;
2557 if (is_aa1 && is_na1)
2558 BlastConstructErrorMessage(NULL, "First sequence must be protein for this program", 1, &error_return);
2559 if (is_aa2 && is_na2)
2560 BlastConstructErrorMessage(NULL, "Second sequence must be protein for this program", 1, &error_return);
2561 if (!is_aa1 && !is_na1)
2562 BlastConstructErrorMessage(NULL, "First sequence must be nucleotide for this program", 1, &error_return);
2563 if (!is_aa2 && !is_na2)
2564 BlastConstructErrorMessage(NULL, "Second sequence must be nucleotide for this program", 1, &error_return);
2565 if (error_return)
2566
2567 Blast2SeqMainPage(NULL, seq1EntryData, seq2EntryData, NULL,NULL, error_return, is_prot,
2568 options, mtrx, from, to, ffrom, tto, filter, pagecount);
2569
2570 BLASTOptionValidateHTML(options, progname, seq1EntryData, seq2EntryData, NULL, NULL,
2571 is_prot, mtrx, from, to, ffrom, tto, filter,
2572 pagecount);
2573
2574 fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
2575 if (from == 0 && to == 0) {
2576 #ifdef NCBI_INTERNAL_NEW_FORMATTER
2577 ValNodeAddPointer(&slp1, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBest(query_bsp->id, SEQID_GI)));
2578 len1 = query_bsp->length;
2579 #else
2580 ValNodeAddPointer(&slp1, SEQLOC_WHOLE, SeqIdDup(fake_bsp->id));
2581 len1 = fake_bsp->length;
2582 #endif
2583
2584 } else {
2585 sip1 = SeqIntNew();
2586 sip1->from = (from > 0) ? from-1 : 0;
2587 #ifdef NCBI_INTERNAL_NEW_FORMATTER
2588 sip1->to = (to < query_bsp->length) ? to-1 : query_bsp->length-1;
2589 sip1->id = (SeqIdPtr) SeqIdDup (query_bsp->id);
2590 #else
2591 sip1->to = (to < fake_bsp->length) ? to-1 : query_bsp->length-1;
2592 sip1->id = (SeqIdPtr) SeqIdDup (fake_bsp->id);
2593 #endif
2594 sip1->strand = options->strand_option;
2595 ValNodeAddPointer(&slp1, SEQLOC_INT, sip1);
2596 len1 = SeqLocLen(slp1);
2597 }
2598 if (ffrom == 0 && tto == 0) {
2599 ValNodeAddPointer(&slp2, SEQLOC_WHOLE,
2600 SeqIdDup(SeqIdFindBest(subject_bsp->id, SEQID_GI)));
2601 len2 = subject_bsp->length;
2602 } else {
2603 sip2 = SeqIntNew();
2604 sip2->from = (ffrom > 0) ? ffrom-1 : 0;
2605 sip2->to = (tto < subject_bsp->length) ? tto-1 : subject_bsp->length-1;
2606 sip2->id = (SeqIdPtr) SeqIdDup(SeqIdFindBest(subject_bsp->id, SEQID_GI));
2607 sip2->strand = Seq_strand_both;
2608 ValNodeAddPointer(&slp2, SEQLOC_INT, sip2);
2609 len2 = SeqLocLen(slp2);
2610 }
2611 if ((spp = SeqPortNewByLoc(slp1, code1)) == NULL) {
2612 error_msg = "The first sequence location is not valid";
2613
2614 Blast2SeqMainPage(error_msg, seq1EntryData, seq2EntryData, NULL, NULL, NULL, is_prot, options,
2615 mtrx, from, to, ffrom, tto, filter, pagecount);
2616 } else
2617 SeqPortFree(spp);
2618 if ((spp = SeqPortNewByLoc(slp2, code2)) == NULL) {
2619 error_msg = "The second sequence location is not valid";
2620
2621 Blast2SeqMainPage(error_msg, seq1EntryData, seq2EntryData, NULL, NULL, NULL, is_prot, options,
2622 mtrx, from, to, ffrom, tto, filter, pagecount);
2623 } else
2624 SeqPortFree(spp);
2625
2626 other_returns = NULL;
2627
2628 #ifndef BL2SEQ_STANDALONE
2629 loginfo.filename = StringSave("Log/wblast2.log");
2630 loginfo.time_start = 0;
2631 if (((loginfo.user_IP = getenv("PROXIED_IP")) == NULL) &&
2632 ((loginfo.user_IP = getenv("USER_ADDR")) == NULL)) {
2633 loginfo.user_IP = StringSave("255.255.255.255");
2634 }
2635 loginfo.pid = getpid();
2636 loginfo.q_length = len1;
2637 loginfo.s_length = len2;
2638 loginfo.size = 0;
2639
2640 if (!is_megablast)
2641 loginfo.program = StringSave(progname);
2642 else
2643 loginfo.program = StringSave("mblastn");
2644 logmsg(0);
2645 #endif
2646
2647 #ifndef USE_OLD_BLAST
2648 BLAST_SummaryOptionsInit(&s_options);
2649 BLASTOptions2SummaryOptions(options, progname, s_options);
2650 BLAST_TwoSeqLocSets(s_options, slp1, slp2, NULL, &seqalign_arr, &mask, &mask_at_hash,
2651 &s_return);
2652 if (seqalign_arr && seqalign_arr->array)
2653 {
2654 seqalign = seqalign_arr->array[0];
2655 seqalign_arr->array[0] = NULL;
2656 seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr);
2657 }
2658
2659 if (s_return->error) {
2660 /* Print the error message. */
2661 Blast2SeqMainPage(s_return->error->message, seq1EntryData, seq2EntryData, NULL, NULL, NULL,
2662 is_prot, options, mtrx, from, to, ffrom, tto, filter,
2663 pagecount);
2664 }
2665
2666
2667 if (mask_at_hash) {
2668 /* If masking was done for lookup table only, do not use mask locations
2669 for formatting. */
2670 while (mask) {
2671 SeqLocSetFree(mask->data.ptrvalue);
2672 mask = mask->next;
2673 }
2674 }
2675
2676 BLAST_SummaryOptionsFree(s_options);
2677 #else
2678 seqalign = BlastTwoSequencesByLocEx(slp1, slp2, progname,
2679 options, &other_returns, NULL);
2680 #endif
2681 run_status = WBLAST2_FORMAT;
2682 /* seqalign = BlastTwoSequencesEx(query_bsp, subject_bsp, progname,
2683 options, &other_returns, NULL);*/
2684
2685 if (seqalign == NULL) {
2686 if ((chptr = WWWGetValueByName(theInfo->info, "SEQ")) != NULL) {
2687
2688 MemFree(seq1EntryData);
2689 seq1EntryData = StringSave(chptr);
2690 }
2691 if((chptr = WWWGetValueByName(theInfo->info, "SSEQ")) != NULL) {
2692
2693 MemFree(seq2EntryData);
2694 seq2EntryData = StringSave(chptr);
2695 }
2696 CreateJavaHeadHTML(query_bsp, subject_bsp, from, to, ffrom, tto,
2697 len1, len2, progname);
2698
2699 PrintParam(is_prot, mtrx, ma, ms, options, seq2EntryData, seq1EntryData, NULL, NULL,
2700 query_bsp, subject_bsp, len1, len2, from, to, ffrom, tto, pagecount, view, mask_char, mask_color, cds_translation);
2701 printf("<strong><font color=#0000EE>No significant similarity was found</font></strong>\n");
2702
2703 goto cleanup;
2704 }
2705
2706 #ifdef USE_OLD_BLAST
2707 mask = NULL;
2708 for (vnp=other_returns; vnp; vnp = vnp->next) {
2709 switch (vnp->choice) {
2710 case TXDBINFO:
2711 dbinfo = vnp->data.ptrvalue;
2712 break;
2713 case TXKABLK_NOGAP:
2714 ka_params = vnp->data.ptrvalue;
2715 break;
2716 case TXKABLK_GAP:
2717 ka_gap_params = vnp->data.ptrvalue;
2718 break;
2719 case TXPARAMETERS:
2720 buffer = vnp->data.ptrvalue;
2721 break;
2722 case TXMATRIX:
2723 blast_matrix = vnp->data.ptrvalue;
2724 if (blast_matrix)
2725 txmatrix = BlastMatrixToTxMatrix(blast_matrix);
2726 break;
2727 case SEQLOC_MASKING_NOTSET:
2728 case SEQLOC_MASKING_PLUS1:
2729 case SEQLOC_MASKING_PLUS2:
2730 case SEQLOC_MASKING_PLUS3:
2731 case SEQLOC_MASKING_MINUS1:
2732 case SEQLOC_MASKING_MINUS2:
2733 case SEQLOC_MASKING_MINUS3:
2734 ValNodeAddPointer(&mask, vnp->choice, vnp->data.ptrvalue);
2735 break;
2736 default:
2737 break;
2738 }
2739 }
2740
2741 ValNodeFree(other_returns);
2742 #endif
2743
2744 to = (to >0) ? to : query_bsp->length;
2745 tto = (tto >0) ? tto : subject_bsp->length;
2746 from = (from >0) ? from : 1;
2747 ffrom = (ffrom >0) ? ffrom : 1;
2748
2749 CreateJavaHeadHTML(query_bsp, subject_bsp,
2750 from, to, ffrom, tto, len1, len2, progname);
2751
2752 PrintParam(is_prot, mtrx, ma, ms, options, seq2EntryData, seq1EntryData, NULL, NULL,
2753 query_bsp, subject_bsp, len1, len2, from, to, ffrom, tto, pagecount, view, mask_char, mask_color, cds_translation);
2754
2755 align_type = BlastGetProgramNumber(progname);
2756
2757 x_factor = y_factor = 1;
2758 if (align_type == blast_type_tblastn)
2759 x_factor = 3;
2760 else if (align_type == blast_type_blastx)
2761 y_factor = 3;
2762
2763 x = y = 150;
2764 ll = MAX(len1*x_factor, len2*y_factor);
2765 scalex = (FloatHi) x*x_factor/ll;
2766 scaley = (FloatHi) y*y_factor/ll;
2767 k = 0;
2768 color = 1;
2769 index = 0;
2770
2771 printf("<table>\n");
2772 printf("<tr><td>\n");
2773
2774 /* For tblastx all alignments are in one seqalign; let's separate them */
2775 if (align_type == blast_type_tblastx) {
2776 StdSegPtr nextssp;
2777 SeqAlignPtr last_sap = NULL, head = NULL;
2778 sap = seqalign;
2779 ssp = seqalign->segs;
2780 while (ssp) {
2781 nextssp = ssp->next;
2782 ssp->next = NULL;
2783 seqalign->segs = ssp;
2784 sap = SeqAlignDup(seqalign);
2785 if (last_sap) {
2786 last_sap->next = sap;
2787 last_sap = last_sap->next;
2788 } else
2789 head = last_sap = sap;
2790 ssp = nextssp;
2791 }
2792 SeqAlignFree(seqalign);
2793 seqalign = head;
2794 }
2795
2796 for (sap=seqalign; sap != NULL; sap = sap->next) {
2797 if (sap->segtype > 3) {
2798 sap = sap->segs;
2799 }
2800 if (index%2 == 0) {
2801 color = 1;
2802 } else {
2803 color = 2;
2804 }
2805 if (sap->segtype == SAS_DENDIAG) {
2806 for (ddp = sap->segs, index1=0; ddp; ddp= ddp->next, index1++);
2807 } else if (sap->segtype == SAS_DENSEG) {
2808 dsp = sap->segs;
2809 index1 = dsp->numseg;
2810 } else if (sap->segtype == SAS_STD) {
2811 for (ssp = sap->segs, index1=0; ssp; ssp = ssp->next, index1++);
2812 }
2813
2814 rect = MemNew((2*index1+3)*sizeof(PrymPtr));
2815 rectY = MemNew((2*index1+3)*sizeof(PrymPtr));
2816 k = CreateRectAlign(sap, rect, rectY, scalex, scaley,
2817 len1, len2, color, from, ffrom, to, tto);
2818 DrawRectAlign(rect, k, color, 9, index);
2819 printf("<BR>\n");
2820 DrawRectAlign(rectY, k, color, 9, index);
2821
2822 MemFree(rect);
2823 MemFree(rectY);
2824 printf("<BR><BR>\n");
2825 index++;
2826 }
2827 printf("</td><td width=75></td><td valign=top><b>2</b></td><td>\n");
2828 xx = yy = 150;
2829 scalex = ((FloatHi) xx) / len1;
2830 scaley = ((FloatHi) yy) / len2;
2831
2832 /* printf("<INPUT TYPE=image border=0 SRC=\"bag?");*/
2833 printf("<IMG border=0 SRC=\"bl2bag.cgi?");
2834 xx = scalex * len1;
2835 yy = scaley * len2;
2836 printf("%s", NumToGun(xx));
2837 printf("%s", NumToGun(yy));
2838 printf("CCCCCC-");
2839
2840 printf("rD9D9D9(");
2841
2842 for (slp=qslp; slp != NULL; slp=slp->next) {
2843 if (slp->choice == SEQLOC_PACKED_INT) {
2844 for (sl = slp->data.ptrvalue; sl; sl=sl->next) {
2845 if (SeqLocStart(sl) >= from || SeqLocStop(sl) <= to) {
2846 continue;
2847 }
2848 x = SeqLocStart(sl) * scalex;
2849 printf("%s", NumToGun(x));
2850 printf("00");
2851 y = SeqLocStop(sl) * scaley;
2852 printf("%s", NumToGun(x));
2853 printf("%s", NumToGun(y));
2854 }
2855 } else {
2856 if (SeqLocStart(slp) >= from || SeqLocStop(slp) <= to) {
2857 continue;
2858 }
2859 x = SeqLocStart(slp) * scalex;
2860 printf("%s", NumToGun(x));
2861 printf("00");
2862 y = SeqLocStop(slp) * scaley;
2863 printf("%s", NumToGun(x));
2864 printf("%s", NumToGun(y));
2865 }
2866 }
2867 printf(")");
2868
2869 hsat = NULL;
2870 index = 0;
2871
2872 for (sap=seqalign; sap != NULL; sap = sapnext) {
2873 if (sap->segtype > 3) {
2874 sap = sap->segs;
2875 }
2876 sapnext = sap->next;
2877 sap->next = NULL;
2878 sat = SeqAnnotNew();
2879 sat->type = 2;
2880 sat->data = sap;
2881 hsat = tie_next_annot(hsat, sat);
2882
2883 if (index%2 == 0) {
2884 printf("3399CC3(");
2885 } else {
2886 printf("8dC7CC3(");
2887 }
2888 if (sap->segtype == SAS_DENDIAG) {
2889 ddp = sap->segs;
2890 while (ddp != NULL) {
2891 if (ddp->strands[0] != Seq_strand_minus) {
2892 x = ddp->starts [0] * scalex;
2893 } else {
2894 x = (ddp->starts [0] + ddp->len) * scalex;
2895 }
2896 if (ddp->strands[1] != Seq_strand_minus) {
2897 y = yy - ddp->starts [1] * scaley;
2898 } else {
2899 y = yy - (ddp->starts [1] + ddp->len) * scaley;
2900 }
2901 printf("%s", NumToGun(x));
2902 printf("%s", NumToGun(y));
2903 if (ddp->strands[0] != Seq_strand_minus) {
2904 x = (ddp->starts [0] + ddp->len) * scalex;
2905 } else {
2906 x = ddp->starts [0] * scalex;
2907 }
2908 if (ddp->strands[1] != Seq_strand_minus) {
2909 y = yy - (ddp->starts [1] + ddp->len) * scaley;
2910 } else {
2911 y = yy - ddp->starts [1] * scaley;
2912 }
2913 printf("%s", NumToGun(x));
2914 printf("%s", NumToGun(y));
2915 ddp = ddp->next;
2916 }
2917 printf(")");
2918 } else if (sap->segtype == SAS_DENSEG) {
2919 dsp = sap->segs;
2920 for (index1=0; index1 < dsp->numseg; index1++) {
2921 i2 = 2 * index1;
2922 if (dsp->starts[i2] != -1 && dsp->starts[i2+1] != -1) {
2923 if (dsp->strands[i2] != Seq_strand_minus) {
2924 x = (dsp->starts[i2] - from) * scalex;
2925 } else {
2926 x = (dsp->starts[i2] + dsp->lens[index1] - from) * scalex;
2927 }
2928 if (dsp->strands[i2+1] != Seq_strand_minus) {
2929 y = yy - (dsp->starts[i2+1] -ffrom) * scaley;
2930 } else {
2931 y = yy - (dsp->starts[i2+1] + dsp->lens[index1] - ffrom) * scaley;
2932 }
2933 printf("%s", NumToGun(x));
2934 printf("%s", NumToGun(y));
2935 if (dsp->strands[i2] != Seq_strand_minus) {
2936 x = (dsp->starts[i2] + dsp->lens[index1] - from) * scalex;
2937 } else {
2938 x = (dsp->starts[i2] - from) * scalex;
2939 }
2940 if (dsp->strands[i2+1] != Seq_strand_minus) {
2941 y = yy - (dsp->starts[i2+1] + dsp->lens[index1] - ffrom) * scaley;
2942 } else {
2943 y = yy - (dsp->starts[i2+1] - ffrom)* scaley;
2944 }
2945 printf("%s", NumToGun(x));
2946 printf("%s", NumToGun(y));
2947 }
2948 }
2949 printf(")");
2950 } else if (sap->segtype == SAS_STD) {
2951 SeqLocPtr tmp_slp;
2952 ssp = sap->segs;
2953 while (ssp) {
2954 tmp_slp = ssp->loc;
2955 if (!tmp_slp || !tmp_slp->next) {
2956 ssp = ssp->next;
2957 continue;
2958 }
2959 if (tmp_slp->choice != SEQLOC_EMPTY &&
2960 tmp_slp->next->choice != SEQLOC_EMPTY) {
2961
2962 if (SeqLocStrand(tmp_slp) != Seq_strand_minus) {
2963 x = (SeqLocStart(tmp_slp) - from) * scalex;
2964 } else {
2965 x = (SeqLocStop(tmp_slp) + 1 - from) * scalex;
2966 }
2967 if (SeqLocStrand(tmp_slp->next) != Seq_strand_minus) {
2968 y = yy - (SeqLocStart(tmp_slp->next) - ffrom) * scaley;
2969 } else {
2970 y = yy - (SeqLocStop(tmp_slp->next) + 1 - ffrom) * scaley;
2971 }
2972 printf("%s", NumToGun(x));
2973 printf("%s", NumToGun(y));
2974 if (SeqLocStrand(tmp_slp) != Seq_strand_minus) {
2975 x = (SeqLocStop(tmp_slp) + 1 - from) * scalex;
2976 } else {
2977 x = (SeqLocStart(tmp_slp) - from) * scalex;
2978 }
2979 if (SeqLocStrand(tmp_slp->next) != Seq_strand_minus) {
2980 y = yy - (SeqLocStop(tmp_slp->next) + 1 - ffrom) * scaley;
2981 } else {
2982 y = yy - (SeqLocStart(tmp_slp->next) - ffrom) * scaley;
2983 }
2984 printf("%s", NumToGun(x));
2985 printf("%s", NumToGun(y));
2986 }
2987 ssp = ssp->next;
2988 }
2989 printf(")");
2990 }
2991
2992 index++;
2993 }
2994
2995 #ifndef BL2SEQ_STANDALONE
2996 loginfo.size = index;
2997 #endif
2998
2999 printf("\">\n");
3000 printf("</td><td valign=bottom><b>1</b></td></tr></table>\n");
3001 printf("<BR>");
3002
3003 color = 1;
3004 index = 0;
3005 printf("<font color=#FF0000>NOTE:</font>Bitscore and expect value are calculated based on the size of the nr database.<BR><BR>\n");
3006
3007 if (align_type == blast_type_blastn)
3008 printf("<font color=#FF0000>NOTE:</font>If protein translation is reversed, please repeat the search with reverse strand of the query sequence.<BR><BR>\n");
3009
3010 for (sat=hsat; sat != NULL; sat=satnext) {
3011 satnext=sat->next;
3012 sat->next = NULL;
3013
3014 x = y = 450;
3015
3016 ll = MAX(len1*x_factor, len2*y_factor);
3017 scalex = (FloatHi) x*x_factor/ll;
3018 scaley = (FloatHi) y*y_factor/ll;
3019 k = 0;
3020
3021 if ((sap = sat->data) != NULL) {
3022 if (index%2 == 0) {
3023 color = 1;
3024 } else {
3025 color = 2;
3026 }
3027 printf("<a name=%d>\n", index);
3028
3029 #ifndef NCBI_INTERNAL_NEW_FORMATTER
3030 PrintOutScore(sap, is_prot, NULL, mask);
3031 #endif
3032 if (sap->segtype == SAS_DENDIAG) {
3033 for (ddp = sap->segs, index1=0; ddp; ddp= ddp->next, index1++);
3034 } else if (sap->segtype == SAS_DENSEG) {
3035 dsp = sap->segs;
3036 index1 = dsp->numseg;
3037 } else if (sap->segtype == SAS_STD) {
3038 ssp = sap->segs;
3039 for (ssp = sap->segs, index1=0; ssp; ssp= ssp->next, index1++);
3040 }
3041 rect = MemNew((2*index1+3)*sizeof(PrymPtr));
3042 rectY = MemNew((2*index1+3)*sizeof(PrymPtr));
3043
3044 k = CreateRectAlign(sap, rect, rectY, scalex, scaley,
3045 len1, len2, color, from, ffrom, to, tto);
3046 DrawRectAlign(rect, k, color, 11, -1);
3047 MemFree(rect);
3048 printf("<BR>\n");
3049 DrawRectAlign(rectY, k, color, 11, -1);
3050 MemFree(rectY);
3051 printf("<BR><BR>\n");
3052 index++;
3053 }
3054
3055 txoption = 0;
3056 txoption += TXALIGN_COMPRESS;
3057 txoption += TXALIGN_END_NUM;
3058 txoption += TXALIGN_SHOW_GI;
3059 txoption += TXALIGN_MATRIX_VAL;
3060 txoption += TXALIGN_SHOW_QS;
3061
3062 if (StringICmp(progname, "blastx") == 0)
3063 txoption += TXALIGN_BLASTX_SPECIAL;
3064
3065 txoption += TXALIGN_HTML;
3066
3067 AddAlignInfoToSeqAnnot(sat, align_type);
3068 #ifdef NCBI_INTERNAL_NEW_FORMATTER
3069 fprintf(stdout, "<pre>");
3070 DisplayAlign((SeqAlignPtr)sat->data, 60, query_bsp, subject_bsp,
3071 progname, txmatrix, mask, mask_char, mask_color,
3072 cds_translation, view, stdout);
3073 fprintf(stdout, "</pre>");
3074 #else
3075 ShowTextAlignFromAnnot(sat, 60, stdout, NULL, NULL,
3076 txoption, txmatrix, mask, NULL);
3077 #endif
3078 }
3079
3080 cleanup:
3081
3082
3083
3084 MemFree(seq1EntryData);
3085 MemFree(seq2EntryData);
3086
3087 SeqLocSetFree(slp1);
3088 SeqLocSetFree(slp2);
3089 if (accessionOrGi_1 && *accessionOrGi_1 != NULLB)
3090 BioseqUnlock(query_bsp);
3091 else
3092 SeqEntryFree(query_sep);
3093 if (accessionOrGi_2 && *accessionOrGi_2 != NULLB)
3094 BioseqUnlock(subject_bsp);
3095 else
3096 SeqEntryFree(subject_sep);
3097
3098 BlastTimeFillStructure(&time_keeper);
3099 printf("<pre>\n");
3100 fprintf(stdout, "CPU time: %8.2f user secs.\t%8.2f sys. "
3101 "secs\t%8.2f total secs.\n\n",
3102 time_keeper.user, time_keeper.system, time_keeper.total);
3103 init_buff();
3104 /*PrintDbReport(dbinfo, 70, stdout);*/
3105
3106 blast_matrix = BLAST_MatrixDestruct(blast_matrix);
3107 if (txmatrix)
3108 txmatrix = TxMatrixDestruct(txmatrix);
3109 dbinfo = TxDfDbInfoDestruct(dbinfo);
3110 #ifndef USE_OLD_BLAST
3111 if (s_return) {
3112 if (s_return->ka_params) {
3113 PrintKAParameters(s_return->ka_params->Lambda, s_return->ka_params->K,
3114 s_return->ka_params->H, 70, stdout, FALSE);
3115 }
3116 if (s_return->ka_params_gap) {
3117 PrintKAParameters(s_return->ka_params_gap->Lambda,
3118 s_return->ka_params_gap->K,
3119 s_return->ka_params_gap->H, 70, stdout, TRUE);
3120 }
3121 BlastProgram2Number(progname, &program_number);
3122 buffer = Blast_GetParametersBuffer(program_number, s_return);
3123 PrintTildeSepLines(buffer, 70, stdout);
3124 sfree(buffer);
3125 Blast_SummaryReturnFree(s_return);
3126 }
3127 #else
3128 if (ka_params) {
3129 PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H,
3130 70, stdout, FALSE);
3131 MemFree(ka_params);
3132 }
3133 if (ka_gap_params) {
3134 PrintKAParameters(ka_gap_params->Lambda, ka_gap_params->K,
3135 ka_gap_params->H, 70, stdout, TRUE);
3136 MemFree(ka_gap_params);
3137 }
3138 PrintTildeSepLines(buffer, 70, stdout);
3139 MemFree(buffer);
3140 free_buff();
3141 #endif
3142 mask_head = mask;
3143 while (mask) {
3144 SeqLocSetFree(mask->data.ptrvalue);
3145 mask = mask->next;
3146 }
3147 ValNodeFree(mask_head);
3148
3149 CreateTailHTML();
3150 run_status = WBLAST2_DONE;
3151 options = BLASTOptionDelete(options);
3152 WWWInfoFree(theInfo->info);
3153 MemFree(progname);
3154 MemFree(theInfo);
3155 #ifndef BL2SEQ_STANDALONE
3156 /* Connection thread not spawned for the WWW server version */
3157 NlmThreadJoin(connection_thread, &thrstat);
3158 logmsg(0);
3159 LogInfoFree();
3160 #endif
3161 return 0;
3162 }
3163
3164