1 /* $Id: elecpcr.c,v 6.9 2002/07/25 14:15:09 beloslyu Exp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: $RCSfile: elecpcr.c,v $
27 *
28 * Author: Sergei Shavirin
29 *
30 * Version Creation Date: 12/19/1996
31 *
32 * $Revision: 6.9 $
33 *
34 * File Description:
35 * Main program for WWW and Command-Line Electronic PCR
36 *
37 * $Log: elecpcr.c,v $
38 * Revision 6.9 2002/07/25 14:15:09 beloslyu
39 * change www3 to www
40 *
41 * Revision 6.8 1999/07/27 18:43:04 shavirin
42 * Fixed problems found on PC NT computer.
43 *
44 * Revision 6.7 1999/02/24 16:49:23 kans
45 * use accutils copy of IS_ntdb_accession and IS_protdb_accession
46 *
47 * Revision 6.6 1999/02/23 17:28:11 shavirin
48 * Replaced IS_ accession verification functions by it's "relaxed"
49 * version
50 *
51 * Revision 6.5 1998/05/22 19:19:28 shavirin
52 * Updated for porting to sunweb. Fixed bug with filtering of
53 * organisms.
54 *
55 * Revision 6.4 1998/05/01 19:22:29 shavirin
56 * Improved WWW page formating
57 *
58 * Revision 6.3 1998/05/01 18:22:58 shavirin
59 * Next revision
60 *
61 * Revision 6.2 1998/04/28 19:32:13 shavirin
62 * Fixed minor bugs detected by purify
63 *
64 * Revision 6.1 1997/09/03 17:19:45 shavirin
65 * Added some debug information and sequence prevented from freeing
66 *
67 * Revision 6.0 1997/08/25 18:19:31 madden
68 * Revision changed to 6.0
69 *
70 * Revision 1.3 1997/05/23 15:32:18 shavirin
71 * Added ability to use local database files (for remote users)
72 *
73 * Revision 1.2 1997/05/14 19:12:41 shavirin
74 * Added define LF 10
75 *
76 * Revision 1.1 1996/12/19 20:54:35 shavirin
77 * Initial revision
78 *
79 *
80 * ==========================================================================
81 */
82
83 #include <ncbi.h>
84 #include <ncbiwww.h>
85 #include <accentr.h>
86 #include <accutils.h>
87 #include <stsutil.h>
88
89 /****************************************************************************/
90 /* DEFINES */
91 /****************************************************************************/
92
93 #define BG_COLOR "#FDF5E6"
94 #define PRIMER1_COLOR "#00BF00"
95 #define PRIMER2_COLOR "red"
96
97 #define LogFile "wwwsts.log"
98
99 #define FASTA_IN 1
100 #define ACC_IN 2
101
102 #define MINSEQLEN 10
103 #define LF 10
104
105 /****************************************************************************/
106 /* TYPEDEFS */
107 /****************************************************************************/
108
109 typedef struct AccList {
110 CharPtr acc;
111 struct AccList *next;
112 } AccList, PNTR AccListPtr;
113
114 typedef struct StsPar {
115 CharPtr sequence;
116 Boolean html;
117 Int4 organism;
118 CharPtr orgname;
119 Int4 intype;
120 Boolean detailed;
121 CharPtr sts_db_name;
122 CharPtr org_db_name;
123 CharPtr map_db_name;
124 } StsPar, PNTR StsParPtr;
125
126 /****************************************************************************/
127 /* STATIC FINCTIONS */
128 /****************************************************************************/
129
130 static Boolean GetSTSEntry(StsResultPtr result,
131 FILE *fd, Boolean detailed);
132
133 static StsResultPtr PrintSTSHeader(StsResultPtr result, CharPtr label,
134 Boolean detailed, Boolean html);
135 static Boolean PrintSTSDetailes(StsResultPtr result, Boolean html);
136
137 static AccListPtr GetAccList(CharPtr buffer, Int4Ptr TotalItems);
138 static void WWWSendSTSPage(CharPtr WWWSequence,
139 Boolean NetscapeOK);
140
141 static StsParPtr GetStsSearchParam(void);
142 static FastaSeqPtr NextFastaFromBuff(CharPtr p, CharPtr PNTR next);
143 static Int4 FilterNucSequence(CharPtr p);
144
145 static StsParPtr StsParNew(void);
146 static void StsParFree(StsParPtr param);
147 static StsParPtr STSReadCommandLine(void);
148
149 #define STS_WWW_DATABASE "/web/public/htdocs/STS/DB/sts.db"
150 #define ORG_WWW_DATABASE "/web/public/htdocs/STS/DB/org.db"
151 #define MAP_WWW_DATABASE "/web/public/htdocs/STS/DB/sts.map"
152
IS_NOT_accession(CharPtr word)153 static Boolean IS_NOT_accession (CharPtr word)
154 {
155 Int4 len, i;
156
157 if(word == NULL)
158 return TRUE;
159
160 if((len = StringLen(word)) == 0)
161 return TRUE;
162
163 /* Testing, that this is 6 length accession */
164
165 if(len == 6 && isalpha(word[0])) {
166 for(i = 1; i < len; i++) {
167 if(!isdigit(word[i]))
168 break;
169 }
170
171 if (i == len) return FALSE; /* This is accession */
172 }
173
174 if(len == 8 && isalpha(word[0]) && isalpha(word[1])) {
175 for(i = 2; i < len; i++) {
176 if(!isdigit(word[i])) {
177 break;
178 }
179 }
180 if (i == len) return FALSE; /* This is accession */
181 }
182
183 return TRUE;
184 }
185
Main(void)186 Int2 Main(void)
187 {
188 StsParPtr stsp;
189 Int4 NumBadChar, StsCount;
190 CharPtr inbuff;
191 FastaSeqPtr fseq;
192 CharPtr NextChar;
193 StsResultPtr result= NULL, newresult= NULL, tmpresult=NULL;
194 Int4 NumAcc;
195 AccListPtr MainAccList, AccTmp;
196 STSDataPtr sts_data;
197 STSDbNamesPtr db_name = NULL;
198
199 putenv("USER=STSSearch");
200
201 if((stsp = GetStsSearchParam()) == NULL) {
202 printf("Error getting Search data. Exiting ...\n");
203 exit(1);
204 }
205
206 if(stsp->html) {
207
208 printf("Content-type: text/html\r\n\r\n");
209 printf("<HTML>"
210 "<BODY bgcolor=\"%s\" link=\"blue\" vlink=#4500A0>"
211 "<TITLE> STS Match Program </TITLE>"
212 "<map name=TitleMap>"
213 "<area shape=rect coords=3,1,43,21 "
214 "href=http://www.ncbi.nlm.nih.gov>"
215 "<area shape=rect coords=381,1,426,21 "
216 "href=http://www.ncbi.nlm.nih.gov/dbSTS/> "
217 "<area shape=rect coords=431,1,476,21 "
218 "href=http://www.ncbi.nlm.nih.gov/Entrez> "
219 "<area shape=rect coords=481,1,500,21 "
220 "href=/STS/About_ePCR.html>"
221 "</map>"
222 "<IMG SRC=\"/STS/pcr_res.gif\" BORDER=0 USEMAP=#TitleMap "
223 "HEIGHT=22 WIDTH=500><BR> <BR>\n", BG_COLOR);
224 fflush(stdout);
225 }
226
227 #ifdef WWW_VERSION
228 db_name = MemNew(sizeof(STSDbNames));
229 db_name->sts_db_name = StringSave(STS_WWW_DATABASE);
230 db_name->sts_map_name = StringSave(MAP_WWW_DATABASE);
231 db_name->sts_org_name = StringSave(ORG_WWW_DATABASE);
232 #else
233 if(stsp->sts_db_name != NULL ||
234 stsp->map_db_name != NULL || stsp->org_db_name) {
235 db_name = MemNew(sizeof(STSDbNames));
236 db_name->sts_db_name = StringSave(stsp->sts_db_name);
237 db_name->sts_map_name = StringSave(stsp->map_db_name);
238 db_name->sts_org_name = StringSave(stsp->org_db_name);
239 }
240 #endif
241
242 if((StsCount = InitSTSSearch_r(db_name, &sts_data)) < 0) {
243 printf("Cannot initiate STS Search ... Exiting ...\n");
244 exit(1);
245 } else if(stsp->html) {
246 printf("<b>STS database initialized with %d sequences, "
247 "please wait for results ...</b><BR><HR><PRE>", StsCount
248 );
249 fflush(stdout);
250 }
251
252 stsp->organism = STSGetOrganismIndex(sts_data, stsp->orgname);
253
254 if(db_name != NULL) {
255 MemFree(db_name->sts_db_name);
256 MemFree(db_name->sts_map_name);
257 MemFree(db_name);
258 }
259
260 if(stsp->intype != FASTA_IN) {
261
262 if (! EntrezInit("STSSearch", FALSE, NULL)) {
263 printf("Cannot initialize Entrez<BR>");
264 }
265
266 EntrezBioseqFetchEnable("STSSearch", TRUE);
267 }
268
269 printf(
270 "Location of STS GenBank\n"
271 "within query accession\n"
272 "sequence dbSTS id number Chromosome STS marker name\n"
273 "================ ======== ========= ========== ==============================\n\n");
274
275 fflush(stdout);
276
277 if(stsp->intype == FASTA_IN) {
278 if(stsp->sequence[0] == NULLB) {
279 printf("No sequence present in the search query.\n");
280 exit(1);
281 } else if(StringLen(inbuff = stsp->sequence) < MINSEQLEN) {
282 printf("Length of entered sequence too small to start search\n");
283 exit(1);
284 }
285 while(inbuff != NULL) {
286 fseq = NextFastaFromBuff(inbuff, &NextChar);
287 inbuff = NextChar;
288 if((NumBadChar = FilterNucSequence(fseq->seq)) < 0) {
289 printf("Error filtering nucleotide sequence\n");
290 exit(1);
291 } else if (NumBadChar >0 ) {
292 printf("<b>WARNING!!!</b> %d bad characters found "
293 "in the input sequence\n\n",
294 NumBadChar);
295 }
296
297 if(!STSSearch_r(sts_data, fseq->seq, stsp->organism, &newresult)) {
298 printf("Error in STS Search. Exiting ...\n");
299 return 1;
300 }
301
302 if(newresult != NULL) {
303 tmpresult = newresult;
304 newresult = PrintSTSHeader(newresult, fseq->label,
305 stsp->detailed, stsp->html);
306 newresult->next = result;
307 result = tmpresult;
308 } else {
309 printf("%s\n Did not return any hits ...\n\n",
310 fseq->label);
311 fflush(stdout);
312 }
313 MemFree(fseq->label);
314 MemFree(fseq->seq);
315 MemFree(fseq);
316 } /* while (inbuff != NULL) */
317 } else { /* Type = Accession or GI */
318
319 /* Here first we will fetch sequence from Entrez */
320
321 MainAccList = GetAccList(stsp->sequence, &NumAcc);
322
323 if(NumAcc == 0) {
324 printf("<b>ERROR:</b> No valid gi/accessions found in input.<HR>");
325 exit(1);
326 }
327
328 /* printf("Retrieved %d valid accessions/gis\n", NumAcc); */
329
330 for (AccTmp = MainAccList; AccTmp != NULL; AccTmp = AccTmp->next) {
331 if((fseq = AccessionToFasta(AccTmp->acc)) == NULL) {
332 printf("<b>ERROR:</b> No record was found "
333 "for %s - skipping..</b>",
334 AccTmp->acc);
335 continue;
336 }
337
338 if(!STSSearch_r(sts_data, fseq->seq, stsp->organism, &newresult)) {
339 printf("Error in STS Search. Exiting ...\n");
340 return 1;
341 }
342 if(newresult != NULL) {
343 tmpresult = newresult;
344 newresult = PrintSTSHeader(newresult, fseq->label,
345 stsp->detailed, stsp->html);
346 newresult->next = result;
347 result = tmpresult;
348 } else {
349 printf("%s\n Did not return any hits ...\n\n",
350 fseq->label);
351 fflush(stdout);
352 }
353
354 MemFree(fseq->seq);
355 MemFree(fseq->label);
356 MemFree(fseq);
357 }
358 }
359
360 if(stsp->html)
361 printf("<HR>\n");
362
363 /* If detailed output requested */
364
365 if(result && stsp->detailed)
366 PrintSTSDetailes(result, stsp->html);
367
368 if(stsp->detailed || stsp->intype != FASTA_IN) {
369 EntrezBioseqFetchDisable();
370 EntrezFini();
371 }
372
373 StsResultFree(result);
374
375 STSDataFree(sts_data);
376
377 StsParFree(stsp);
378 return 0;
379 }
PrintSTSHeader(StsResultPtr result,CharPtr label,Boolean detailed,Boolean html)380 static StsResultPtr PrintSTSHeader(StsResultPtr result, CharPtr label,
381 Boolean detailed, Boolean html)
382 {
383 Char TmpBuff[1024];
384 Int4 tmplen;
385 register Int4 i;
386
387 printf("%s\n", label);
388
389 while (TRUE) {
390
391 /* Now printing short header information line */
392
393 if(detailed && html) {
394 sprintf(TmpBuff, " <a href=#%08s>%d..%d</a> ",
395 result->acc,
396 result->pos, result->pos+result->real_len-1
397 );
398 } else {
399 sprintf(TmpBuff, " %d..%d ",
400 result->pos, result->pos+result->real_len-1
401 );
402 }
403 printf("%s", TmpBuff);
404
405 if(detailed && html)
406 tmplen = 41 - StringLen(TmpBuff);
407 else
408 tmplen = 19 - StringLen(TmpBuff);
409 for(i =0; i < tmplen; i++)
410 printf(" ");
411
412 if(html) {
413 sprintf(TmpBuff, "<a href=\"http://www2.ncbi.nlm.nih.gov/cgi-bin/"
414 "birx_by_acc?dbsts+%d\">%d</a>",
415 result->id_sts, result->id_sts);
416 } else {
417 sprintf(TmpBuff, "%d", result->id_sts);
418 }
419
420 printf("%s", TmpBuff);
421 if(html)
422 tmplen = 86 - StringLen(TmpBuff);
423 else
424 tmplen = 10 - StringLen(TmpBuff);
425
426 for(i =0; i < tmplen; i++)
427 printf(" ");
428
429 if(html) {
430 sprintf(TmpBuff, "<a href=\"http://www.ncbi.nlm.nih.gov/"
431 "htbin-post/Entrez/query?form=6&dopt=g&db=n&"
432 "uid=%s\">%s</a>",
433 result->acc, result->acc);
434 } else {
435 sprintf(TmpBuff, "%s", result->acc);
436 }
437
438 printf("%s", TmpBuff);
439
440 if(html)
441 tmplen = 112 - StringLen(TmpBuff);
442 else
443 tmplen = 14 - StringLen(TmpBuff);
444
445 for(i =0; i < tmplen; i++)
446 printf(" ");
447
448 sprintf(TmpBuff, "%s",
449 StringCmp(result->chrom, "0") ? result->chrom : " ");
450
451 printf("%s", TmpBuff);
452
453 tmplen = 9 - StringLen(TmpBuff);
454 for(i =0; i < tmplen; i++)
455 printf(" ");
456
457 printf("%s\n", result->sts_name);
458
459 fflush(stdout);
460 if(result->next == NULL)
461 break;
462 result = result->next;
463 }
464 printf("\n");
465 fflush(stdout);
466 return result;
467 }
468
PrintSTSDetailes(StsResultPtr result,Boolean html)469 static Boolean PrintSTSDetailes(StsResultPtr result, Boolean html)
470 {
471 register Int4 i;
472 Int4 len1, len2, len_tot, len_end;
473 Boolean ColorSet;
474
475 printf("\n%sDetailed information...%s",
476 html? "<b>" : "",
477 html? "</b>\n\n" : ""
478 );
479 fflush(stdout);
480 while(result) {
481
482 if(html) {
483 printf("<HR><a name=%08s>\n<h3>%s</h3>\n\n</a>",
484 result->acc, result->sts_name);
485
486 printf("dbSTS id: <a href=\"http://www2.ncbi.nlm.nih.gov/cgi-bin/"
487 "birx_by_acc?dbsts+%d\">%d</a>, "
488 "GenBank Accession: "
489 "<a href=\"http://www.ncbi.nlm.nih.gov/"
490 "htbin-post/Entrez/query?form=6&dopt=g&db=n&"
491 "uid=%s\">%s</a><BR>Organism: %s\n"
492 "Primer1: <FONT color=\"%s\">%s</FONT>\n"
493 "Primer2: <FONT color=\"%s\">%s</FONT>\n"
494 "STS location: %d..%d Chromosome: %s\n"
495 "Expected amplicon size: %d, Observed amplicon size: %d\n",
496 result->id_sts, result->id_sts,
497 result->acc, result->acc,
498 result->org,
499 PRIMER1_COLOR, result->pr1,
500 PRIMER2_COLOR, result->pr2,
501 result->pos, result->pos+result->real_len,
502 StringCmp(result->chrom, "0") ? result->chrom : " ",
503 labs(result->exp_len), result->real_len
504 );
505 } else {
506 printf("\n\n\n-------+= %s =+-------\n\n",
507 result->acc, result->sts_name);
508
509 printf("dbSTS id: %d, GenBank Accession: %s\n"
510 "Organism: %s\n"
511 "Primer1: %s\n"
512 "Primer2: %s\n"
513 "STS location: %d..%d Chromosome: %s\n"
514 "Expected amplicon size: %d, Observed amplicon size: %d\n",
515 result->id_sts,
516 result->acc,
517 result->org,
518 result->pr1,
519 result->pr2,
520 result->pos, result->pos+result->real_len-1,
521 StringCmp(result->chrom, "0") ? result->chrom : " ",
522 labs(result->exp_len), result->real_len
523 );
524 }
525
526 fflush(stdout);
527 printf("Primers match in %s orientation\n",
528 result->exp_len > 0 ? "forward" : "backward");
529
530 printf("\n%sQuery sequence:%s\n\n",
531 html? "<b>" : "",
532 html? "</b>" : ""
533 );
534
535 fflush(stdout);
536
537 len1 = StringLen(result->pr1) + result->start;
538 len2 = result->real_len - StringLen(result->pr2) + result->start;
539 len_tot = StringLen(result->sequence);
540 len_end = result->real_len + result->start;
541
542 printf("%6d ", result->pos - result->start);
543 fflush(stdout);
544 ColorSet = FALSE;
545
546 for(i = 0; i < len_tot; i++) {
547
548 if(!(i%10) && i)
549 printf(" ");
550
551 if(i==result->start && html) {
552 printf("<FONT color=\"%s\">",
553 result->exp_len > 0 ? PRIMER1_COLOR : PRIMER2_COLOR);
554 ColorSet = (result->exp_len > 0 ? 1 : 2);
555 }
556 if(i == len_end && html) {
557 printf("</FONT>");
558 ColorSet = FALSE;
559 }
560 if(i == len1 && html) {
561 printf("</FONT>");
562 ColorSet = FALSE;
563 }
564 if(i == len2 && html) {
565 printf("<FONT color=\"%s\">",
566 result->exp_len > 0 ? PRIMER2_COLOR : PRIMER1_COLOR);
567 ColorSet = (result->exp_len > 0 ? 2 : 1);
568 }
569 if(!(i%60) && i) {
570 if(ColorSet > 0 && html)
571 printf("</FONT>\n%6d <FONT color=\"%s\">",
572 result->pos - result->start + i,
573 ColorSet == 1 ? PRIMER1_COLOR : PRIMER2_COLOR );
574 else
575 printf("\n%6d ", result->pos - result->start + i );
576 }
577 printf("%c", result->sequence[i]);
578 }
579
580 fflush(stdout);
581
582 result=result->next;
583 fflush(stdout);
584 }
585
586 if(html) {
587 printf("</PRE>");
588 for(i=0; i<25; i++)
589 printf("<BR>");
590 } else {
591 printf("\n\n\n");
592 }
593
594 fflush(stdout);
595 return TRUE;
596 }
GetAccList(CharPtr buffer,Int4Ptr TotalItems)597 static AccListPtr GetAccList(CharPtr buffer, Int4Ptr TotalItems)
598 {
599 Char TmpBuff[256];
600 register Int4 i, j, k;
601 Int4 FileLen = 0;
602 AccListPtr acclist = NULL;
603 AccListPtr acclistTmp, acclistlast;
604 Int4 NumInvalid = 0;
605
606 *TotalItems = 0;
607
608 if(buffer == NULL || buffer[0] == NULLB)
609 return NULL;
610
611 MemSet(TmpBuff, '\0', sizeof(TmpBuff));
612
613 FileLen = StringLen(buffer);
614
615 for(i = 0; i < FileLen; i++) {
616
617 if(NumInvalid > 10) {
618 printf("<b>ERROR :</b> Too many invalid Gi/Accession numbers <BR>");
619 return NULL;
620 }
621
622 if(isspace(buffer[i])) /* Rolling spaces */
623 continue;
624
625 j= 0;
626 while (!isspace(buffer[i]) && j < 10 && i < FileLen) {
627 TmpBuff[j] = buffer[i];
628 j++; i++;
629 }
630 TmpBuff[j] = NULLB;
631
632 /* Now validating accession/gi */
633
634 for(k =0; k < j; k++) {
635 if(!isdigit(TmpBuff[k])) {
636 break;
637 }
638 }
639 if(k != j) {
640 if(IS_NOT_accession(TmpBuff)){
641 printf("<b>WARNING:</b> Gi/Accession \"%s\" "
642 "is not valid<BR>",
643 TmpBuff);
644 NumInvalid++;
645 continue;
646 }
647 }
648
649 /* It we come here - we got valid text ID */
650
651 if(acclist == NULL) { /* first element */
652 acclist = (AccListPtr) MemNew(sizeof(AccList));
653 acclistTmp = acclist;
654 acclistTmp->acc = StringSave(TmpBuff);
655 acclistTmp->next = NULL;
656 acclistlast=acclistTmp;
657 *TotalItems = *TotalItems +1;
658 } else {
659 acclistTmp = (AccListPtr) MemNew(sizeof(AccList));
660 acclistlast->next = acclistTmp;
661 acclistTmp->acc = StringSave(TmpBuff);
662 acclistTmp->next = NULL;
663 acclistlast = acclistTmp;
664 *TotalItems = *TotalItems +1;
665 }
666 }
667 return acclist;
668 }
669
WWWSendSTSPage(CharPtr WWWSequence,Boolean NetscapeOK)670 static void WWWSendSTSPage(CharPtr WWWSequence,
671 Boolean NetscapeOK)
672 {
673 Int4 i;
674 STSOrgPtr PNTR OrgTable;
675
676
677
678 printf("Content-type: text/html\r\n\r\n");
679 printf("<HTML>"
680 "<BODY bgcolor=\"%s\">"
681 "<TITLE> STS Match Program </TITLE>\n"
682 "<map name=TitleMap>\n"
683 "<area shape=rect coords=3,1,43,21 "
684 "href=http://www.ncbi.nlm.nih.gov>\n"
685 "<area shape=rect coords=381,1,426,21 "
686 "href=http://www.ncbi.nlm.nih.gov:%s/dbSTS/> "
687 "<area shape=rect coords=431,1,476,21 "
688 "href=http://www.ncbi.nlm.nih.gov/Entrez> "
689 "<area shape=rect coords=481,1,500,21 "
690 "href=/STS/About_ePCR.html>"
691 "</map>\n"
692 "<IMG SRC=\"/STS/pcr.gif\" BORDER=0 USEMAP=#TitleMap "
693 "HEIGHT=22 WIDTH=500>\n", BG_COLOR, getenv("SERVER_PORT"));
694
695 if((OrgTable = STSGetOrgTable()) == NULL) {
696 printf("ERROR: Cannot initiate Organism index "
697 "Exiting...\n");
698 exit(1);
699 }
700
701 printf("<FORM ACTION=\"%s/result \" METHOD=\"POST\" "
702 "NAME=\"STSTOOL\" %s>\n",
703 getenv("SCRIPT_NAME") != NULL ? getenv("SCRIPT_NAME") : "NOT_SET",
704 NetscapeOK? "ENCTYPE=\"multipart/form-data\"" : "");
705
706 printf("PCR-based sequence tagged sites (STSs) "
707 "have been used as landmarks for "
708 "construction of various types of genomic maps. "
709 "Using \"electronic PCR\" (e-PCR), "
710 "these sites can be detected in DNA sequences, "
711 "potentially allowing their map "
712 "locations to be determined. <BR><BR>\n");
713
714 printf("Enter here your input data as \n"
715 "<select name = \"INPUT_TYPE\"> \n"
716 "<option> Sequence in FASTA format \n"
717 "<option> Accession or GI \n"
718 "</select><BR><BR> \n"
719 "<textarea name=\"SEQUENCE\" rows=6 cols=60>%s</textarea> \n",
720 WWWSequence == NULL ? "" : WWWSequence);
721
722 if(NetscapeOK)
723 printf("<BR>Or load your input data from file: "
724 "<INPUT TYPE=\"file\" NAME=\"SEQFILE\"> \n");
725
726 printf("<BR><BR>\n");
727
728 printf("Retrieve STS from "
729 "<select name = \"ORGANISM\"> \n");
730
731 printf(" <option> All Organisms ");
732 for (i=0; i < MAXORGNUM && OrgTable[i] != NULL; i++) {
733 printf(" <option> %s (%d) \n",
734 OrgTable[i]->string, OrgTable[i]->num);
735 }
736
737 printf("</select><BR><BR>\n");
738
739 printf("Print detailed information "
740 "<INPUT TYPE=\"checkbox\" NAME=\"DETAILED\" CHECKED><BR><BR>\n");
741
742 printf("<INPUT TYPE=\"submit\"> \n"
743 "<INPUT TYPE=\"reset\" VALUE=\"Clear input\"> \n"
744 "<HR></FORM>\n"
745 );
746
747 printf("%c<ADDRESS>\n", LF);
748 printf("Comments and suggestions to:"
749 "< \n<a href=\"mailto:info@ncbi.nlm.nih.gov\">"
750 "info@ncbi.nlm.nih.gov\n"
751 "</a> > <BR> Credits to: \n"
752 "<a href=\"mailto:shavirin@ncbi.nlm.nih.gov\">"
753 "Sergei B. Shavirin</a>, \n"
754 "<!-- <a href=\"http://www.ncbi.nlm.nih.gov/STS/shavirin.html\">"
755 "Sergei B. Shavirin</a> -->\n"
756 "<a href=\"mailto:schuler@ncbi.nlm.nih.gov\">"
757 "Greg Schuler</a> and \n"
758 "<a href=\"mailto:carolyn@ncbi.nlm.nih.gov\">"
759 "Carolyn Tolstoshev</a>\n");
760 printf("</ADDRESS>\n");
761 }
762 #define NUMARGS 9
763
764 Args pcr_args[NUMARGS] = {
765 {"Input type\n"
766 " 0 - FASTA file \n"
767 " 1 - List of Gi/Accession numbers",
768 NULL, NULL,NULL,FALSE,'t',ARG_INT,0.0,0,NULL},
769 {"Format of output \n"
770 " 0 - Text \n"
771 " 1 - HTML ",
772 "0", NULL,NULL,FALSE,'f',ARG_INT, 0.0,0,NULL},
773 { "File with FASTA entries or GI/Accession numbers",
774 "stdin", NULL, NULL, TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
775 {"Print detailed information \n"
776 " 0 - Short \n"
777 " 1 - Detailed ",
778 "0", NULL,NULL,TRUE,'d',ARG_INT, 0.0,0,NULL},
779 { "GI/Accession number for quick look",
780 NULL, NULL, NULL, TRUE, 'u', ARG_STRING, 0.0, 0, NULL},
781 {"STS database file name:",
782 NULL, NULL,NULL,TRUE,'s',ARG_FILE_IN, 0.0,0,NULL},
783 {"ORG database file name:",
784 NULL, NULL,NULL,TRUE,'o',ARG_FILE_IN, 0.0,0,NULL},
785 {"MAP database file name:",
786 NULL, NULL,NULL,TRUE,'m',ARG_FILE_IN, 0.0,0,NULL},
787 {"Logfile name:",
788 "elecpcr.log", NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL}
789 };
790
STSReadCommandLine(void)791 static StsParPtr STSReadCommandLine(void)
792 {
793 StsParPtr stsp;
794 FILE *fd;
795
796 if((stsp = StsParNew()) == NULL)
797 return NULL;
798
799 if ( !GetArgs ("elecpcr", NUMARGS, pcr_args) ) {
800 exit(1);
801 }
802 if (!ErrSetLog (pcr_args[8].strvalue)) {
803 ErrShow();
804 } else {
805 ErrSetOpts (ERR_CONTINUE, ERR_LOG_ON);
806 }
807
808 stsp->sts_db_name = StringSave(pcr_args[5].strvalue);
809 stsp->org_db_name = StringSave(pcr_args[6].strvalue);
810 stsp->map_db_name = StringSave(pcr_args[7].strvalue);
811
812 if(pcr_args[1].intvalue == 1)
813 stsp->html = TRUE;
814 else
815 stsp->html = FALSE;
816
817 if(pcr_args[3].intvalue == 1)
818 stsp->detailed = TRUE;
819 else
820 stsp->detailed = FALSE;
821
822 if(pcr_args[0].intvalue == 1)
823 stsp->intype = ACC_IN;
824 else
825 stsp->intype = FASTA_IN;
826
827 if(pcr_args[4].strvalue != NULL) { /* Quick look ? */
828 stsp->sequence = pcr_args[4].strvalue;
829 } else { /* Reading from file by default */
830 if((fd = FileOpen(pcr_args[2].strvalue, "rb")) == NULL)
831 return NULL;
832 if((stsp->sequence = WWWReadFileInMemory(fd, 0, TRUE)) == NULL)
833 return NULL;
834 }
835 return stsp;
836 }
837
838
GetStsSearchParam(void)839 static StsParPtr GetStsSearchParam(void)
840 {
841
842 WWWInfoPtr info;
843 CharPtr chptr;
844 StsParPtr stsp;
845 FILE *fd;
846 time_t time_now;
847 CharPtr TimeNowStr;
848 WWWErrorCode error;
849 Boolean SequenceOK = FALSE;
850
851 if((fd = FileOpen(LogFile, "a")) == NULL) {
852 if((fd = FileOpen("/tmp/wwwsts.log", "a")) == NULL) {
853 printf("Cannot open logfile. Exiting...\n ");
854 exit(1);
855 }
856 }
857
858 if((error = WWWReadPosting(&info)) != WWWErrOk)
859 return NULL;
860
861 /* First check type of call to the program */
862
863 if(WWWGetMethod(info) == COMMAND_LINE) {
864
865 /* Reading command line and create StsPar structure */
866 WWWInfoFree(info);
867 return(STSReadCommandLine());
868 }
869
870 if(WWWGetMethod(info) == WWW_GET) {
871 if(WWWGetNumEntries(info) == 0) {
872 time_now = time(NULL);
873 TimeNowStr = ctime(&time_now);
874 TimeNowStr[24] = '\0';
875 fprintf(fd, "\n%s|%s|%s|%s|%d",
876 TimeNowStr, WWWGetAddress(info),
877 WWWGetHost(info), WWWGetAgent(info),
878 0);
879 fclose(fd);
880 WWWSendSTSPage(NULL, (Boolean)(WWWGetBrowser(info) == NETSCAPE));
881 exit(1);
882 } else { /* Here is processing of link to Electronic PCR */
883
884 if((stsp = StsParNew()) == NULL)
885 return NULL;
886
887 /* Sequence Accession or GI */
888
889 stsp->intype = ACC_IN;
890
891 if((chptr = WWWGetValueByName(info, "ID")) != NULL) {
892 stsp->sequence = StringSave(chptr);
893 } else {
894 time_now = time(NULL);
895 TimeNowStr = ctime(&time_now);
896 TimeNowStr[24] = '\0';
897 fprintf(fd, "\n%s|%s|%s|%s|%d",
898 TimeNowStr, WWWGetAddress(info),
899 WWWGetHost(info), WWWGetAgent(info),
900 0);
901 fclose(fd);
902 WWWSendSTSPage(NULL, (Boolean)(WWWGetBrowser(info) == NETSCAPE));
903 exit(1);
904 }
905
906 /* Detailed or short */
907
908 if((chptr = WWWGetValueByName(info, "D")) != NULL) {
909 if(!StringICmp(chptr, "0") ||
910 !StringICmp(chptr, "FALSE") ||
911 !StringICmp(chptr, "OFF") ||
912 !StringICmp(chptr, "NO") )
913 stsp->detailed = FALSE;
914 } else {
915 stsp->detailed = TRUE;
916 }
917
918 /* Text or HTML */
919
920 if((chptr = WWWGetValueByName(info, "F")) != NULL) {
921 if(!StringICmp(chptr, "0") ||
922 !StringICmp(chptr, "FALSE") ||
923 !StringICmp(chptr, "OFF") ||
924 !StringICmp(chptr, "TEXT") ||
925 !StringICmp(chptr, "NO") ) {
926 stsp->html = FALSE;
927 printf("Content-type: text/html\r\n\r\n");
928 printf("<BODY bgcolor=\"%s\"><PRE>", BG_COLOR);
929 printf("<TITLE> STS Match Program </TITLE>");
930 }
931 } else {
932 stsp->html = TRUE;
933 }
934 return stsp;
935
936 } /* Link to Electronic PCR proccesing */
937 } /* Method == GET */
938
939 if((stsp = StsParNew()) == NULL)
940 return NULL;
941
942 #ifdef TEST
943 printf("Content-type: text/html\r\n\r\n");
944 info_data = (WWWInfoDataPtr) info;
945 for(i=0; i < info_data->num_entries; i++) {
946 printf("%s : %s\n <BR>%c",
947 info_data->entries[i]->name,
948 info_data->entries[i]->val, LF);
949 }
950 exit(1);
951 #endif
952
953 if((chptr = WWWGetValueByName(info, "ORGANISM")) != NULL)
954 stsp->orgname = StringSave(chptr);
955
956 if((chptr = WWWGetValueByName(info, "SEQUENCE")) != NULL) {
957 if(chptr[0] != NULLB)
958 SequenceOK = TRUE;
959 stsp->sequence = StringSave(chptr);
960 }
961 if(((chptr = WWWGetValueByName(info, "SEQFILE")) != NULL) &&
962 !SequenceOK) {
963 stsp->sequence = StringSave(chptr);
964 }
965
966 if((chptr = WWWGetValueByName(info, "INPUT_TYPE")) != NULL) {
967 if(StringStr(chptr, "Sequence")) {
968 stsp->intype = FASTA_IN;
969 } else if(StringStr(chptr, "Accession")) {
970 stsp->intype = ACC_IN;
971 }
972 }
973
974 if((chptr = WWWGetValueByName(info, "DETAILED")) != NULL) {
975 stsp->detailed = TRUE;
976 }
977
978 time_now = time(NULL);
979 TimeNowStr = ctime(&time_now);
980 TimeNowStr[24] = '\0';
981 fprintf(fd, "\n%s|%s|%s|%s|%d|%d|%d",
982 TimeNowStr, WWWGetAddress(info),
983 WWWGetHost(info), WWWGetAgent(info),
984 stsp->intype,
985 stsp->organism,
986 stsp->detailed);
987 fclose(fd);
988
989 WWWInfoFree(info);
990 return stsp;
991 }
StsParNew(void)992 static StsParPtr StsParNew(void)
993 {
994 StsParPtr param;
995 param = (StsParPtr) MemNew(sizeof(StsPar));
996 param->sequence = NULL;
997 param->detailed = FALSE;
998 param->intype = FASTA_IN;
999 param->html = TRUE;
1000
1001 return param;
1002 }
1003
StsParFree(StsParPtr param)1004 static void StsParFree(StsParPtr param) {
1005 MemFree(param->sequence);
1006 MemFree(param->sts_db_name);
1007 MemFree(param->org_db_name);
1008 MemFree(param->map_db_name);
1009 MemFree(param->orgname);
1010
1011 MemFree(param);
1012 }
1013
NextFastaFromBuff(CharPtr p,CharPtr PNTR next)1014 static FastaSeqPtr NextFastaFromBuff(CharPtr p, CharPtr PNTR next)
1015 {
1016 FastaSeqPtr fseq;
1017 CharPtr t;
1018 CharPtr str;
1019 Int4 i, SeqSize = INIT_BUFF_SIZE;
1020
1021 *next = NULL;
1022
1023 if(p == NULL | p[0] == NULLB)
1024 return NULL;
1025
1026 t = p;
1027
1028 while (isspace(*t)) /* Rolling spaces */
1029 *t++;
1030
1031 fseq = (FastaSeqPtr) MemNew(sizeof(FastaSeq));
1032 fseq->label = NULL;
1033 fseq->seq = NULL;
1034
1035 switch (*t) {
1036 case NULLB:
1037 return NULL;
1038 case '>':
1039 /* Reading label */
1040 str = (CharPtr) MemNew(SeqSize + 5);
1041 str[0] = NULLB;
1042 for(i=0; *t != NULLB; i++) {
1043 if((str[i] = *t) == '\n' || *t == '\r')
1044 break;
1045 if (i > SeqSize) {
1046 SeqSize = i + INIT_BUFF_SIZE;
1047 str = (CharPtr) Realloc(str, SeqSize + 5);
1048 }
1049 *t++;
1050 }
1051 str[i] = NULLB;
1052 fseq->label = StringSave(str);
1053 MemFree(str);
1054
1055 /* Reading sequence */
1056
1057 while (isspace(*t)) /* Rolling spaces */
1058 *t++;
1059 str = (CharPtr) MemNew(SeqSize + 5);
1060 str[0] = NULLB;
1061
1062 for(i=0; *t != NULLB; i++) {
1063 if((str[i] = *t) == '>') {
1064 *next = t;
1065 break;
1066 }
1067
1068 if (i > SeqSize) {
1069 SeqSize = i + INIT_BUFF_SIZE;
1070 str = (CharPtr) Realloc(str, SeqSize + 5);
1071 }
1072 *t++;
1073 }
1074 str[i] = NULLB;
1075 fseq->seq = StringSave(str);
1076 MemFree(str);
1077 break;
1078 default:
1079
1080 /* Reading sequence */
1081
1082 while (isspace(*t)) /* Rolling spaces */
1083 *t++;
1084 str = (CharPtr) MemNew(SeqSize + 5);
1085 str[0] = NULLB;
1086
1087 for(i=0; *t != NULLB; i++) {
1088 if((str[i] = *t) == '>') {
1089 *next = t;
1090 break;
1091 }
1092 if (i > SeqSize) {
1093 SeqSize = i + INIT_BUFF_SIZE;
1094 str = (CharPtr) Realloc(str, SeqSize + 5);
1095 }
1096 *t++;
1097 }
1098 str[i] = NULLB;
1099 fseq->seq = StringSave(str);
1100 MemFree(str);
1101 break;
1102 }
1103 if(fseq->label == NULL)
1104 fseq->label = StringSave(">Your sequence");
1105 return fseq;
1106 }
1107
FilterNucSequence(CharPtr p)1108 static Int4 FilterNucSequence(CharPtr p)
1109 {
1110 CharPtr t, s;
1111 Int4 len, i =0;
1112 Int4 NumBadChar=0;
1113 len = StringLen(p);
1114
1115 s = p;
1116
1117 for(t = p; i < len; i++){
1118 *p = toupper(*p);
1119 if (isalpha(*p)) {
1120 *t = *p;
1121 *t++;
1122 } else {
1123 if(!isspace(*p)) {
1124 NumBadChar++;
1125 }
1126 }
1127 *p++;
1128 }
1129 *t = NULLB;
1130 p = s;
1131 return NumBadChar;
1132 }
1133