1 /* $Id: elecpcr.c,v 6.9 2002/07/25 14:15:09 beloslyu Exp $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  $RCSfile: elecpcr.c,v $
27 *
28 * Author:  Sergei Shavirin
29 *
30 * Version Creation Date: 12/19/1996
31 *
32 * $Revision: 6.9 $
33 *
34 * File Description:
35 *         Main program for WWW and Command-Line Electronic PCR
36 *
37 * $Log: elecpcr.c,v $
38 * Revision 6.9  2002/07/25 14:15:09  beloslyu
39 * change www3 to www
40 *
41 * Revision 6.8  1999/07/27 18:43:04  shavirin
42 * Fixed problems found on PC NT computer.
43 *
44 * Revision 6.7  1999/02/24 16:49:23  kans
45 * use accutils copy of IS_ntdb_accession and IS_protdb_accession
46 *
47 * Revision 6.6  1999/02/23 17:28:11  shavirin
48 * Replaced IS_ accession verification functions by it's "relaxed"
49 * version
50 *
51 * Revision 6.5  1998/05/22 19:19:28  shavirin
52 * Updated for porting to sunweb. Fixed bug with filtering of
53 * organisms.
54 *
55 * Revision 6.4  1998/05/01 19:22:29  shavirin
56 * Improved WWW page formating
57 *
58 * Revision 6.3  1998/05/01 18:22:58  shavirin
59 * Next revision
60 *
61 * Revision 6.2  1998/04/28 19:32:13  shavirin
62 * Fixed minor bugs detected by purify
63 *
64 * Revision 6.1  1997/09/03 17:19:45  shavirin
65 * Added some debug information and sequence prevented from freeing
66 *
67 * Revision 6.0  1997/08/25 18:19:31  madden
68 * Revision changed to 6.0
69 *
70 * Revision 1.3  1997/05/23 15:32:18  shavirin
71 * Added ability to use local database files (for remote users)
72 *
73 * Revision 1.2  1997/05/14 19:12:41  shavirin
74 * Added define LF 10
75 *
76 * Revision 1.1  1996/12/19 20:54:35  shavirin
77 * Initial revision
78 *
79 *
80 * ==========================================================================
81 */
82 
83 #include <ncbi.h>
84 #include <ncbiwww.h>
85 #include <accentr.h>
86 #include <accutils.h>
87 #include <stsutil.h>
88 
89 /****************************************************************************/
90 /* DEFINES */
91 /****************************************************************************/
92 
93 #define BG_COLOR      "#FDF5E6"
94 #define PRIMER1_COLOR "#00BF00"
95 #define PRIMER2_COLOR "red"
96 
97 #define LogFile       "wwwsts.log"
98 
99 #define FASTA_IN 1
100 #define ACC_IN   2
101 
102 #define MINSEQLEN 10
103 #define LF 10
104 
105 /****************************************************************************/
106 /* TYPEDEFS */
107 /****************************************************************************/
108 
109 typedef struct AccList {
110     CharPtr acc;
111     struct AccList *next;
112 } AccList, PNTR AccListPtr;
113 
114 typedef struct StsPar {
115     CharPtr sequence;
116     Boolean html;
117     Int4 organism;
118     CharPtr orgname;
119     Int4 intype;
120     Boolean detailed;
121     CharPtr sts_db_name;
122     CharPtr org_db_name;
123     CharPtr map_db_name;
124 } StsPar, PNTR StsParPtr;
125 
126 /****************************************************************************/
127 /* STATIC FINCTIONS */
128 /****************************************************************************/
129 
130 static Boolean GetSTSEntry(StsResultPtr result,
131                            FILE *fd, Boolean detailed);
132 
133 static StsResultPtr PrintSTSHeader(StsResultPtr result, CharPtr label,
134                                    Boolean detailed, Boolean html);
135 static Boolean PrintSTSDetailes(StsResultPtr result, Boolean html);
136 
137 static AccListPtr GetAccList(CharPtr buffer, Int4Ptr TotalItems);
138 static void WWWSendSTSPage(CharPtr WWWSequence,
139                            Boolean NetscapeOK);
140 
141 static StsParPtr GetStsSearchParam(void);
142 static FastaSeqPtr NextFastaFromBuff(CharPtr p, CharPtr PNTR next);
143 static Int4 FilterNucSequence(CharPtr p);
144 
145 static StsParPtr StsParNew(void);
146 static void StsParFree(StsParPtr param);
147 static StsParPtr STSReadCommandLine(void);
148 
149 #define STS_WWW_DATABASE  "/web/public/htdocs/STS/DB/sts.db"
150 #define ORG_WWW_DATABASE  "/web/public/htdocs/STS/DB/org.db"
151 #define MAP_WWW_DATABASE  "/web/public/htdocs/STS/DB/sts.map"
152 
IS_NOT_accession(CharPtr word)153 static Boolean IS_NOT_accession (CharPtr word)
154 {
155     Int4 len, i;
156 
157     if(word == NULL)
158         return TRUE;
159 
160     if((len = StringLen(word)) == 0)
161         return TRUE;
162 
163     /* Testing, that this is 6 length accession */
164 
165     if(len == 6 && isalpha(word[0])) {
166         for(i = 1; i < len; i++) {
167             if(!isdigit(word[i]))
168                 break;
169         }
170 
171         if (i == len) return FALSE; /* This is accession */
172     }
173 
174     if(len == 8 && isalpha(word[0])  && isalpha(word[1])) {
175         for(i = 2; i < len; i++) {
176             if(!isdigit(word[i])) {
177                 break;
178             }
179         }
180         if (i == len) return FALSE; /* This is accession */
181     }
182 
183     return TRUE;
184 }
185 
Main(void)186 Int2 Main(void)
187 {
188     StsParPtr stsp;
189     Int4 NumBadChar, StsCount;
190     CharPtr inbuff;
191     FastaSeqPtr fseq;
192     CharPtr NextChar;
193     StsResultPtr result= NULL, newresult= NULL, tmpresult=NULL;
194     Int4 NumAcc;
195     AccListPtr MainAccList, AccTmp;
196     STSDataPtr sts_data;
197     STSDbNamesPtr db_name = NULL;
198 
199     putenv("USER=STSSearch");
200 
201     if((stsp = GetStsSearchParam()) == NULL) {
202         printf("Error getting Search data. Exiting ...\n");
203         exit(1);
204     }
205 
206     if(stsp->html) {
207 
208         printf("Content-type: text/html\r\n\r\n");
209         printf("<HTML>"
210                "<BODY bgcolor=\"%s\" link=\"blue\" vlink=#4500A0>"
211                "<TITLE> STS Match Program </TITLE>"
212                "<map name=TitleMap>"
213                "<area shape=rect coords=3,1,43,21 "
214                "href=http://www.ncbi.nlm.nih.gov>"
215                "<area shape=rect coords=381,1,426,21 "
216                "href=http://www.ncbi.nlm.nih.gov/dbSTS/> "
217                "<area shape=rect coords=431,1,476,21 "
218                "href=http://www.ncbi.nlm.nih.gov/Entrez> "
219                "<area shape=rect coords=481,1,500,21 "
220                "href=/STS/About_ePCR.html>"
221                "</map>"
222                "<IMG SRC=\"/STS/pcr_res.gif\" BORDER=0 USEMAP=#TitleMap "
223                "HEIGHT=22 WIDTH=500><BR>&nbsp;<BR>\n", BG_COLOR);
224         fflush(stdout);
225     }
226 
227 #ifdef WWW_VERSION
228     db_name = MemNew(sizeof(STSDbNames));
229     db_name->sts_db_name = StringSave(STS_WWW_DATABASE);
230     db_name->sts_map_name = StringSave(MAP_WWW_DATABASE);
231     db_name->sts_org_name = StringSave(ORG_WWW_DATABASE);
232 #else
233     if(stsp->sts_db_name != NULL ||
234        stsp->map_db_name != NULL || stsp->org_db_name) {
235         db_name = MemNew(sizeof(STSDbNames));
236         db_name->sts_db_name = StringSave(stsp->sts_db_name);
237         db_name->sts_map_name = StringSave(stsp->map_db_name);
238         db_name->sts_org_name = StringSave(stsp->org_db_name);
239     }
240 #endif
241 
242     if((StsCount = InitSTSSearch_r(db_name, &sts_data)) < 0) {
243         printf("Cannot initiate STS Search ... Exiting ...\n");
244         exit(1);
245     } else if(stsp->html) {
246         printf("<b>STS database initialized with %d sequences, "
247                "please wait for results ...</b><BR><HR><PRE>", StsCount
248                );
249         fflush(stdout);
250     }
251 
252     stsp->organism = STSGetOrganismIndex(sts_data, stsp->orgname);
253 
254     if(db_name != NULL) {
255         MemFree(db_name->sts_db_name);
256         MemFree(db_name->sts_map_name);
257         MemFree(db_name);
258     }
259 
260     if(stsp->intype != FASTA_IN) {
261 
262         if (! EntrezInit("STSSearch", FALSE, NULL)) {
263             printf("Cannot initialize Entrez<BR>");
264         }
265 
266         EntrezBioseqFetchEnable("STSSearch", TRUE);
267     }
268 
269     printf(
270 "Location of STS              GenBank\n"
271 "within query                 accession\n"
272 "sequence          dbSTS id   number    Chromosome  STS marker name\n"
273 "================  ========  =========  ==========  ==============================\n\n");
274 
275     fflush(stdout);
276 
277     if(stsp->intype == FASTA_IN) {
278         if(stsp->sequence[0] == NULLB) {
279             printf("No sequence present in the search query.\n");
280             exit(1);
281         } else if(StringLen(inbuff = stsp->sequence) < MINSEQLEN) {
282             printf("Length of entered sequence too small to start search\n");
283             exit(1);
284         }
285         while(inbuff != NULL) {
286             fseq = NextFastaFromBuff(inbuff, &NextChar);
287             inbuff = NextChar;
288             if((NumBadChar = FilterNucSequence(fseq->seq)) < 0) {
289                 printf("Error filtering nucleotide sequence\n");
290                 exit(1);
291             } else if (NumBadChar >0 ) {
292                 printf("<b>WARNING!!!</b> %d bad characters found "
293                        "in the input sequence\n\n",
294                        NumBadChar);
295             }
296 
297             if(!STSSearch_r(sts_data, fseq->seq, stsp->organism, &newresult)) {
298                 printf("Error in STS Search. Exiting ...\n");
299                 return 1;
300             }
301 
302             if(newresult != NULL) {
303                 tmpresult = newresult;
304                 newresult = PrintSTSHeader(newresult, fseq->label,
305                                            stsp->detailed, stsp->html);
306                 newresult->next = result;
307                 result = tmpresult;
308             } else {
309                 printf("%s\n Did not return any hits ...\n\n",
310                        fseq->label);
311                 fflush(stdout);
312             }
313             MemFree(fseq->label);
314             MemFree(fseq->seq);
315             MemFree(fseq);
316         } /* while (inbuff != NULL) */
317     } else  {  /* Type = Accession or GI */
318 
319         /* Here first we will fetch sequence from Entrez */
320 
321         MainAccList = GetAccList(stsp->sequence, &NumAcc);
322 
323         if(NumAcc == 0) {
324             printf("<b>ERROR:</b> No valid gi/accessions found in input.<HR>");
325             exit(1);
326         }
327 
328         /*    printf("Retrieved %d valid accessions/gis\n", NumAcc);  */
329 
330         for (AccTmp = MainAccList; AccTmp != NULL; AccTmp = AccTmp->next) {
331             if((fseq = AccessionToFasta(AccTmp->acc)) == NULL) {
332                 printf("<b>ERROR:</b> No record was found "
333                        "for %s - skipping..</b>",
334                        AccTmp->acc);
335                 continue;
336             }
337 
338             if(!STSSearch_r(sts_data, fseq->seq, stsp->organism, &newresult)) {
339                 printf("Error in STS Search. Exiting ...\n");
340                 return 1;
341             }
342             if(newresult != NULL) {
343                 tmpresult = newresult;
344                 newresult = PrintSTSHeader(newresult, fseq->label,
345                                            stsp->detailed, stsp->html);
346                 newresult->next = result;
347                 result = tmpresult;
348             } else {
349                 printf("%s\n Did not return any hits ...\n\n",
350                        fseq->label);
351                 fflush(stdout);
352             }
353 
354             MemFree(fseq->seq);
355             MemFree(fseq->label);
356             MemFree(fseq);
357         }
358     }
359 
360     if(stsp->html)
361         printf("<HR>\n");
362 
363     /* If detailed output requested */
364 
365     if(result && stsp->detailed)
366         PrintSTSDetailes(result, stsp->html);
367 
368     if(stsp->detailed || stsp->intype != FASTA_IN) {
369         EntrezBioseqFetchDisable();
370         EntrezFini();
371     }
372 
373     StsResultFree(result);
374 
375     STSDataFree(sts_data);
376 
377     StsParFree(stsp);
378     return 0;
379 }
PrintSTSHeader(StsResultPtr result,CharPtr label,Boolean detailed,Boolean html)380 static StsResultPtr PrintSTSHeader(StsResultPtr result, CharPtr label,
381                                    Boolean detailed, Boolean html)
382 {
383   Char TmpBuff[1024];
384   Int4 tmplen;
385   register Int4 i;
386 
387   printf("%s\n", label);
388 
389   while (TRUE) {
390 
391     /* Now printing short header information line */
392 
393     if(detailed && html) {
394       sprintf(TmpBuff, " <a href=#%08s>%d..%d</a> ",
395               result->acc,
396               result->pos,  result->pos+result->real_len-1
397               );
398     } else {
399       sprintf(TmpBuff, " %d..%d ",
400               result->pos,  result->pos+result->real_len-1
401               );
402     }
403     printf("%s", TmpBuff);
404 
405     if(detailed && html)
406       tmplen = 41 - StringLen(TmpBuff);
407     else
408       tmplen = 19 - StringLen(TmpBuff);
409     for(i =0; i < tmplen; i++)
410       printf(" ");
411 
412     if(html) {
413       sprintf(TmpBuff, "<a href=\"http://www2.ncbi.nlm.nih.gov/cgi-bin/"
414               "birx_by_acc?dbsts+%d\">%d</a>",
415               result->id_sts, result->id_sts);
416     } else {
417       sprintf(TmpBuff, "%d", result->id_sts);
418     }
419 
420     printf("%s", TmpBuff);
421     if(html)
422       tmplen = 86 - StringLen(TmpBuff);
423     else
424       tmplen = 10 - StringLen(TmpBuff);
425 
426     for(i =0; i < tmplen; i++)
427       printf(" ");
428 
429     if(html) {
430       sprintf(TmpBuff, "<a href=\"http://www.ncbi.nlm.nih.gov/"
431               "htbin-post/Entrez/query?form=6&dopt=g&db=n&"
432               "uid=%s\">%s</a>",
433               result->acc, result->acc);
434     } else {
435       sprintf(TmpBuff, "%s", result->acc);
436     }
437 
438     printf("%s", TmpBuff);
439 
440     if(html)
441       tmplen = 112 - StringLen(TmpBuff);
442     else
443       tmplen = 14 - StringLen(TmpBuff);
444 
445     for(i =0; i < tmplen; i++)
446       printf(" ");
447 
448     sprintf(TmpBuff, "%s",
449             StringCmp(result->chrom, "0") ? result->chrom : " ");
450 
451     printf("%s", TmpBuff);
452 
453     tmplen = 9 - StringLen(TmpBuff);
454     for(i =0; i < tmplen; i++)
455       printf(" ");
456 
457     printf("%s\n", result->sts_name);
458 
459     fflush(stdout);
460     if(result->next == NULL)
461       break;
462     result = result->next;
463   }
464   printf("\n");
465   fflush(stdout);
466   return result;
467 }
468 
PrintSTSDetailes(StsResultPtr result,Boolean html)469 static Boolean PrintSTSDetailes(StsResultPtr result, Boolean html)
470 {
471   register Int4 i;
472   Int4 len1, len2, len_tot, len_end;
473   Boolean ColorSet;
474 
475   printf("\n%sDetailed information...%s",
476          html? "<b>"  : "",
477          html? "</b>\n\n" : ""
478          );
479   fflush(stdout);
480   while(result) {
481 
482     if(html) {
483       printf("<HR><a name=%08s>\n<h3>%s</h3>\n\n</a>",
484              result->acc, result->sts_name);
485 
486       printf("dbSTS id: <a href=\"http://www2.ncbi.nlm.nih.gov/cgi-bin/"
487              "birx_by_acc?dbsts+%d\">%d</a>, "
488              "GenBank Accession: "
489              "<a href=\"http://www.ncbi.nlm.nih.gov/"
490              "htbin-post/Entrez/query?form=6&dopt=g&db=n&"
491              "uid=%s\">%s</a><BR>Organism: %s\n"
492              "Primer1: <FONT color=\"%s\">%s</FONT>\n"
493              "Primer2: <FONT color=\"%s\">%s</FONT>\n"
494              "STS location: %d..%d Chromosome: %s\n"
495              "Expected amplicon size: %d, Observed amplicon size: %d\n",
496              result->id_sts, result->id_sts,
497              result->acc, result->acc,
498              result->org,
499              PRIMER1_COLOR, result->pr1,
500              PRIMER2_COLOR, result->pr2,
501              result->pos, result->pos+result->real_len,
502              StringCmp(result->chrom, "0") ? result->chrom : " ",
503              labs(result->exp_len), result->real_len
504              );
505     } else {
506       printf("\n\n\n-------+= %s =+-------\n\n",
507              result->acc, result->sts_name);
508 
509       printf("dbSTS id: %d, GenBank Accession: %s\n"
510              "Organism: %s\n"
511              "Primer1: %s\n"
512              "Primer2: %s\n"
513              "STS location: %d..%d Chromosome: %s\n"
514              "Expected amplicon size: %d, Observed amplicon size: %d\n",
515              result->id_sts,
516              result->acc,
517              result->org,
518              result->pr1,
519              result->pr2,
520              result->pos, result->pos+result->real_len-1,
521              StringCmp(result->chrom, "0") ? result->chrom : " ",
522              labs(result->exp_len), result->real_len
523              );
524     }
525 
526     fflush(stdout);
527     printf("Primers match in %s orientation\n",
528            result->exp_len > 0 ? "forward" : "backward");
529 
530     printf("\n%sQuery sequence:%s\n\n",
531            html? "<b>"  : "",
532            html? "</b>" : ""
533            );
534 
535     fflush(stdout);
536 
537     len1 = StringLen(result->pr1) + result->start;
538     len2 = result->real_len - StringLen(result->pr2) + result->start;
539     len_tot = StringLen(result->sequence);
540     len_end = result->real_len + result->start;
541 
542     printf("%6d  ", result->pos - result->start);
543     fflush(stdout);
544     ColorSet = FALSE;
545 
546     for(i = 0; i < len_tot; i++) {
547 
548       if(!(i%10) && i)
549         printf(" ");
550 
551       if(i==result->start && html) {
552         printf("<FONT color=\"%s\">",
553                result->exp_len > 0 ? PRIMER1_COLOR : PRIMER2_COLOR);
554         ColorSet = (result->exp_len > 0 ? 1 : 2);
555       }
556       if(i == len_end && html) {
557         printf("</FONT>");
558         ColorSet = FALSE;
559       }
560       if(i == len1 && html) {
561         printf("</FONT>");
562         ColorSet = FALSE;
563       }
564       if(i == len2 && html) {
565         printf("<FONT color=\"%s\">",
566                result->exp_len > 0 ? PRIMER2_COLOR : PRIMER1_COLOR);
567         ColorSet = (result->exp_len > 0 ? 2 : 1);
568       }
569       if(!(i%60) && i) {
570         if(ColorSet > 0 && html)
571           printf("</FONT>\n%6d  <FONT color=\"%s\">",
572                  result->pos - result->start + i,
573                  ColorSet == 1 ? PRIMER1_COLOR : PRIMER2_COLOR );
574         else
575           printf("\n%6d  ", result->pos - result->start + i );
576       }
577       printf("%c", result->sequence[i]);
578     }
579 
580     fflush(stdout);
581 
582     result=result->next;
583     fflush(stdout);
584   }
585 
586   if(html) {
587     printf("</PRE>");
588     for(i=0; i<25; i++)
589       printf("<BR>");
590   } else {
591     printf("\n\n\n");
592   }
593 
594   fflush(stdout);
595   return TRUE;
596 }
GetAccList(CharPtr buffer,Int4Ptr TotalItems)597 static AccListPtr GetAccList(CharPtr buffer, Int4Ptr TotalItems)
598 {
599   Char TmpBuff[256];
600   register Int4 i, j, k;
601   Int4 FileLen = 0;
602   AccListPtr acclist = NULL;
603   AccListPtr acclistTmp, acclistlast;
604   Int4 NumInvalid = 0;
605 
606   *TotalItems = 0;
607 
608   if(buffer == NULL || buffer[0] == NULLB)
609       return NULL;
610 
611   MemSet(TmpBuff, '\0', sizeof(TmpBuff));
612 
613   FileLen = StringLen(buffer);
614 
615   for(i = 0; i < FileLen; i++) {
616 
617     if(NumInvalid > 10) {
618       printf("<b>ERROR  :</b> Too many invalid Gi/Accession numbers <BR>");
619       return NULL;
620     }
621 
622     if(isspace(buffer[i])) /* Rolling spaces */
623       continue;
624 
625     j= 0;
626     while (!isspace(buffer[i]) && j < 10 && i < FileLen) {
627       TmpBuff[j] = buffer[i];
628       j++; i++;
629     }
630     TmpBuff[j] = NULLB;
631 
632     /* Now validating accession/gi */
633 
634     for(k =0; k < j; k++) {
635       if(!isdigit(TmpBuff[k])) {
636         break;
637       }
638     }
639     if(k != j) {
640         if(IS_NOT_accession(TmpBuff)){
641             printf("<b>WARNING:</b> Gi/Accession \"%s\" "
642                    "is not valid<BR>",
643                    TmpBuff);
644             NumInvalid++;
645             continue;
646         }
647     }
648 
649     /* It we come here - we got valid text ID */
650 
651     if(acclist == NULL) { /* first element */
652         acclist = (AccListPtr) MemNew(sizeof(AccList));
653         acclistTmp = acclist;
654         acclistTmp->acc = StringSave(TmpBuff);
655         acclistTmp->next = NULL;
656         acclistlast=acclistTmp;
657         *TotalItems = *TotalItems +1;
658     } else {
659         acclistTmp =  (AccListPtr) MemNew(sizeof(AccList));
660         acclistlast->next = acclistTmp;
661         acclistTmp->acc = StringSave(TmpBuff);
662         acclistTmp->next = NULL;
663         acclistlast = acclistTmp;
664         *TotalItems = *TotalItems +1;
665     }
666   }
667   return acclist;
668 }
669 
WWWSendSTSPage(CharPtr WWWSequence,Boolean NetscapeOK)670 static void WWWSendSTSPage(CharPtr WWWSequence,
671                            Boolean NetscapeOK)
672 {
673   Int4 i;
674   STSOrgPtr PNTR OrgTable;
675 
676 
677 
678   printf("Content-type: text/html\r\n\r\n");
679   printf("<HTML>"
680          "<BODY bgcolor=\"%s\">"
681          "<TITLE> STS Match Program </TITLE>\n"
682          "<map name=TitleMap>\n"
683          "<area shape=rect coords=3,1,43,21 "
684          "href=http://www.ncbi.nlm.nih.gov>\n"
685          "<area shape=rect coords=381,1,426,21 "
686          "href=http://www.ncbi.nlm.nih.gov:%s/dbSTS/> "
687          "<area shape=rect coords=431,1,476,21 "
688          "href=http://www.ncbi.nlm.nih.gov/Entrez> "
689          "<area shape=rect coords=481,1,500,21 "
690          "href=/STS/About_ePCR.html>"
691          "</map>\n"
692          "<IMG SRC=\"/STS/pcr.gif\" BORDER=0 USEMAP=#TitleMap "
693          "HEIGHT=22 WIDTH=500>\n", BG_COLOR, getenv("SERVER_PORT"));
694 
695   if((OrgTable = STSGetOrgTable()) == NULL) {
696     printf("ERROR: Cannot initiate Organism index "
697            "Exiting...\n");
698     exit(1);
699   }
700 
701   printf("<FORM ACTION=\"%s/result \" METHOD=\"POST\" "
702          "NAME=\"STSTOOL\" %s>\n",
703          getenv("SCRIPT_NAME") != NULL ? getenv("SCRIPT_NAME") : "NOT_SET",
704          NetscapeOK? "ENCTYPE=\"multipart/form-data\"" : "");
705 
706   printf("PCR-based sequence tagged sites (STSs) "
707          "have been used as landmarks for "
708          "construction of various types of genomic maps. "
709          "Using \"electronic PCR\" (e-PCR), "
710          "these sites can be detected in DNA sequences, "
711          "potentially allowing their map "
712          "locations to be determined. <BR><BR>\n");
713 
714   printf("Enter here your input data as \n"
715          "<select name = \"INPUT_TYPE\"> \n"
716          "<option> Sequence in FASTA format \n"
717          "<option> Accession or GI \n"
718          "</select><BR><BR> \n"
719          "<textarea name=\"SEQUENCE\" rows=6 cols=60>%s</textarea> \n",
720          WWWSequence == NULL ? "" : WWWSequence);
721 
722   if(NetscapeOK)
723     printf("<BR>Or load your input data from file: "
724            "<INPUT TYPE=\"file\" NAME=\"SEQFILE\"> \n");
725 
726   printf("<BR><BR>\n");
727 
728   printf("Retrieve STS from &nbsp;"
729          "<select name = \"ORGANISM\"> \n");
730 
731   printf(" <option> All Organisms ");
732   for (i=0; i < MAXORGNUM && OrgTable[i] != NULL; i++) {
733     printf(" <option> %s (%d) \n",
734            OrgTable[i]->string, OrgTable[i]->num);
735   }
736 
737   printf("</select><BR><BR>\n");
738 
739   printf("Print detailed information "
740          "<INPUT TYPE=\"checkbox\" NAME=\"DETAILED\" CHECKED><BR><BR>\n");
741 
742   printf("<INPUT TYPE=\"submit\"> \n"
743          "<INPUT TYPE=\"reset\" VALUE=\"Clear input\"> \n"
744          "<HR></FORM>\n"
745          );
746 
747   printf("%c<ADDRESS>\n", LF);
748   printf("Comments and suggestions to:"
749          "&lt; \n<a href=\"mailto:info@ncbi.nlm.nih.gov\">"
750          "info@ncbi.nlm.nih.gov\n"
751          "</a> &gt; <BR> Credits to: \n"
752          "<a href=\"mailto:shavirin@ncbi.nlm.nih.gov\">"
753          "Sergei B. Shavirin</a>, \n"
754          "<!-- <a href=\"http://www.ncbi.nlm.nih.gov/STS/shavirin.html\">"
755          "Sergei B. Shavirin</a> -->\n"
756          "<a href=\"mailto:schuler@ncbi.nlm.nih.gov\">"
757          "Greg Schuler</a> and \n"
758          "<a href=\"mailto:carolyn@ncbi.nlm.nih.gov\">"
759          "Carolyn Tolstoshev</a>\n");
760   printf("</ADDRESS>\n");
761 }
762 #define NUMARGS 9
763 
764 Args pcr_args[NUMARGS] = {
765   {"Input type\n"
766    "         0 - FASTA file \n"
767    "         1 - List of Gi/Accession numbers",
768    NULL, NULL,NULL,FALSE,'t',ARG_INT,0.0,0,NULL},
769   {"Format of output  \n"
770    "         0 - Text \n"
771    "         1 - HTML   ",
772    "0", NULL,NULL,FALSE,'f',ARG_INT, 0.0,0,NULL},
773   { "File with FASTA entries or GI/Accession numbers",
774     "stdin", NULL, NULL, TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
775   {"Print detailed information \n"
776    "         0 - Short         \n"
777    "         1 - Detailed        ",
778    "0", NULL,NULL,TRUE,'d',ARG_INT, 0.0,0,NULL},
779   { "GI/Accession number for quick look",
780     NULL, NULL, NULL, TRUE, 'u', ARG_STRING, 0.0, 0, NULL},
781   {"STS database file name:",
782    NULL, NULL,NULL,TRUE,'s',ARG_FILE_IN, 0.0,0,NULL},
783   {"ORG database file name:",
784    NULL, NULL,NULL,TRUE,'o',ARG_FILE_IN, 0.0,0,NULL},
785   {"MAP database file name:",
786    NULL, NULL,NULL,TRUE,'m',ARG_FILE_IN, 0.0,0,NULL},
787   {"Logfile name:",
788    "elecpcr.log", NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL}
789 };
790 
STSReadCommandLine(void)791 static StsParPtr STSReadCommandLine(void)
792 {
793   StsParPtr stsp;
794   FILE *fd;
795 
796   if((stsp = StsParNew()) == NULL)
797       return NULL;
798 
799   if ( !GetArgs ("elecpcr", NUMARGS, pcr_args) ) {
800       exit(1);
801   }
802   if (!ErrSetLog (pcr_args[8].strvalue)) {
803     ErrShow();
804   } else {
805     ErrSetOpts (ERR_CONTINUE, ERR_LOG_ON);
806   }
807 
808   stsp->sts_db_name = StringSave(pcr_args[5].strvalue);
809   stsp->org_db_name = StringSave(pcr_args[6].strvalue);
810   stsp->map_db_name = StringSave(pcr_args[7].strvalue);
811 
812   if(pcr_args[1].intvalue == 1)
813     stsp->html = TRUE;
814   else
815     stsp->html = FALSE;
816 
817   if(pcr_args[3].intvalue == 1)
818     stsp->detailed = TRUE;
819   else
820     stsp->detailed = FALSE;
821 
822   if(pcr_args[0].intvalue == 1)
823     stsp->intype = ACC_IN;
824   else
825     stsp->intype = FASTA_IN;
826 
827   if(pcr_args[4].strvalue != NULL) {   /* Quick look ? */
828     stsp->sequence = pcr_args[4].strvalue;
829   } else {  /* Reading from file by default */
830     if((fd = FileOpen(pcr_args[2].strvalue, "rb")) == NULL)
831       return NULL;
832     if((stsp->sequence = WWWReadFileInMemory(fd, 0, TRUE)) == NULL)
833       return NULL;
834   }
835   return stsp;
836 }
837 
838 
GetStsSearchParam(void)839 static StsParPtr GetStsSearchParam(void)
840 {
841 
842   WWWInfoPtr info;
843   CharPtr chptr;
844   StsParPtr stsp;
845   FILE *fd;
846   time_t time_now;
847   CharPtr TimeNowStr;
848   WWWErrorCode error;
849   Boolean SequenceOK = FALSE;
850 
851   if((fd = FileOpen(LogFile, "a")) == NULL) {
852       if((fd = FileOpen("/tmp/wwwsts.log", "a")) == NULL) {
853           printf("Cannot open logfile. Exiting...\n ");
854           exit(1);
855       }
856   }
857 
858   if((error = WWWReadPosting(&info)) != WWWErrOk)
859       return NULL;
860 
861   /* First check type of call to the program */
862 
863   if(WWWGetMethod(info) == COMMAND_LINE) {
864 
865     /* Reading command line and create StsPar structure */
866       WWWInfoFree(info);
867       return(STSReadCommandLine());
868   }
869 
870   if(WWWGetMethod(info) == WWW_GET) {
871       if(WWWGetNumEntries(info) == 0) {
872           time_now = time(NULL);
873           TimeNowStr = ctime(&time_now);
874           TimeNowStr[24] = '\0';
875           fprintf(fd, "\n%s|%s|%s|%s|%d",
876                   TimeNowStr, WWWGetAddress(info),
877                   WWWGetHost(info), WWWGetAgent(info),
878                   0);
879           fclose(fd);
880           WWWSendSTSPage(NULL, (Boolean)(WWWGetBrowser(info) == NETSCAPE));
881           exit(1);
882       } else { /* Here is processing of link to Electronic PCR */
883 
884           if((stsp = StsParNew()) == NULL)
885               return NULL;
886 
887           /* Sequence Accession or GI */
888 
889           stsp->intype = ACC_IN;
890 
891           if((chptr = WWWGetValueByName(info, "ID")) != NULL) {
892               stsp->sequence = StringSave(chptr);
893           } else {
894               time_now = time(NULL);
895               TimeNowStr = ctime(&time_now);
896               TimeNowStr[24] = '\0';
897               fprintf(fd, "\n%s|%s|%s|%s|%d",
898                       TimeNowStr, WWWGetAddress(info),
899                       WWWGetHost(info), WWWGetAgent(info),
900                       0);
901               fclose(fd);
902               WWWSendSTSPage(NULL, (Boolean)(WWWGetBrowser(info) == NETSCAPE));
903               exit(1);
904           }
905 
906           /* Detailed or short */
907 
908           if((chptr = WWWGetValueByName(info, "D")) != NULL) {
909               if(!StringICmp(chptr, "0") ||
910                  !StringICmp(chptr, "FALSE") ||
911                  !StringICmp(chptr, "OFF") ||
912                  !StringICmp(chptr, "NO") )
913                   stsp->detailed = FALSE;
914           } else {
915               stsp->detailed = TRUE;
916           }
917 
918           /* Text or HTML */
919 
920           if((chptr = WWWGetValueByName(info, "F")) != NULL) {
921               if(!StringICmp(chptr, "0") ||
922                  !StringICmp(chptr, "FALSE") ||
923                  !StringICmp(chptr, "OFF") ||
924                  !StringICmp(chptr, "TEXT") ||
925                  !StringICmp(chptr, "NO") ) {
926                   stsp->html = FALSE;
927                   printf("Content-type: text/html\r\n\r\n");
928                   printf("<BODY bgcolor=\"%s\"><PRE>", BG_COLOR);
929                   printf("<TITLE> STS Match Program </TITLE>");
930               }
931           } else {
932               stsp->html = TRUE;
933           }
934           return stsp;
935 
936       }  /* Link to Electronic PCR proccesing */
937   }  /* Method == GET */
938 
939   if((stsp = StsParNew()) == NULL)
940     return NULL;
941 
942 #ifdef TEST
943   printf("Content-type: text/html\r\n\r\n");
944   info_data = (WWWInfoDataPtr) info;
945   for(i=0; i < info_data->num_entries; i++) {
946     printf("%s : %s\n <BR>%c",
947            info_data->entries[i]->name,
948            info_data->entries[i]->val, LF);
949   }
950   exit(1);
951 #endif
952 
953   if((chptr = WWWGetValueByName(info, "ORGANISM")) != NULL)
954       stsp->orgname = StringSave(chptr);
955 
956   if((chptr = WWWGetValueByName(info, "SEQUENCE")) != NULL) {
957     if(chptr[0] != NULLB)
958       SequenceOK = TRUE;
959     stsp->sequence = StringSave(chptr);
960   }
961   if(((chptr = WWWGetValueByName(info, "SEQFILE")) != NULL) &&
962      !SequenceOK) {
963     stsp->sequence = StringSave(chptr);
964   }
965 
966   if((chptr = WWWGetValueByName(info, "INPUT_TYPE")) != NULL) {
967     if(StringStr(chptr, "Sequence")) {
968       stsp->intype = FASTA_IN;
969     } else if(StringStr(chptr, "Accession")) {
970       stsp->intype = ACC_IN;
971     }
972   }
973 
974   if((chptr = WWWGetValueByName(info, "DETAILED")) != NULL) {
975     stsp->detailed = TRUE;
976   }
977 
978   time_now = time(NULL);
979   TimeNowStr = ctime(&time_now);
980   TimeNowStr[24] = '\0';
981   fprintf(fd, "\n%s|%s|%s|%s|%d|%d|%d",
982           TimeNowStr, WWWGetAddress(info),
983           WWWGetHost(info), WWWGetAgent(info),
984           stsp->intype,
985           stsp->organism,
986           stsp->detailed);
987   fclose(fd);
988 
989   WWWInfoFree(info);
990   return stsp;
991 }
StsParNew(void)992 static StsParPtr StsParNew(void)
993 {
994     StsParPtr param;
995     param = (StsParPtr) MemNew(sizeof(StsPar));
996     param->sequence = NULL;
997     param->detailed = FALSE;
998     param->intype = FASTA_IN;
999     param->html = TRUE;
1000 
1001     return param;
1002 }
1003 
StsParFree(StsParPtr param)1004 static void StsParFree(StsParPtr param) {
1005     MemFree(param->sequence);
1006     MemFree(param->sts_db_name);
1007     MemFree(param->org_db_name);
1008     MemFree(param->map_db_name);
1009     MemFree(param->orgname);
1010 
1011     MemFree(param);
1012 }
1013 
NextFastaFromBuff(CharPtr p,CharPtr PNTR next)1014 static FastaSeqPtr NextFastaFromBuff(CharPtr p, CharPtr PNTR next)
1015 {
1016   FastaSeqPtr fseq;
1017   CharPtr t;
1018   CharPtr str;
1019   Int4 i, SeqSize = INIT_BUFF_SIZE;
1020 
1021   *next = NULL;
1022 
1023   if(p == NULL | p[0] == NULLB)
1024     return NULL;
1025 
1026   t = p;
1027 
1028   while (isspace(*t)) /* Rolling spaces */
1029     *t++;
1030 
1031   fseq = (FastaSeqPtr) MemNew(sizeof(FastaSeq));
1032   fseq->label = NULL;
1033   fseq->seq = NULL;
1034 
1035   switch (*t) {
1036   case NULLB:
1037     return NULL;
1038   case '>':
1039     /* Reading label */
1040     str = (CharPtr) MemNew(SeqSize + 5);
1041     str[0] = NULLB;
1042     for(i=0; *t != NULLB; i++) {
1043       if((str[i] = *t) == '\n' || *t == '\r')
1044         break;
1045       if (i > SeqSize) {
1046         SeqSize = i + INIT_BUFF_SIZE;
1047         str = (CharPtr) Realloc(str, SeqSize + 5);
1048       }
1049       *t++;
1050     }
1051     str[i] = NULLB;
1052     fseq->label = StringSave(str);
1053     MemFree(str);
1054 
1055     /* Reading sequence */
1056 
1057     while (isspace(*t)) /* Rolling spaces */
1058       *t++;
1059     str = (CharPtr) MemNew(SeqSize + 5);
1060     str[0] = NULLB;
1061 
1062     for(i=0; *t != NULLB; i++) {
1063       if((str[i] = *t) == '>') {
1064         *next = t;
1065         break;
1066       }
1067 
1068       if (i > SeqSize) {
1069         SeqSize = i + INIT_BUFF_SIZE;
1070         str = (CharPtr) Realloc(str, SeqSize + 5);
1071       }
1072       *t++;
1073     }
1074     str[i] = NULLB;
1075     fseq->seq = StringSave(str);
1076     MemFree(str);
1077     break;
1078   default:
1079 
1080     /* Reading sequence */
1081 
1082     while (isspace(*t)) /* Rolling spaces */
1083       *t++;
1084     str = (CharPtr) MemNew(SeqSize + 5);
1085     str[0] = NULLB;
1086 
1087     for(i=0; *t != NULLB; i++) {
1088       if((str[i] = *t) == '>') {
1089         *next = t;
1090         break;
1091       }
1092       if (i > SeqSize) {
1093         SeqSize = i + INIT_BUFF_SIZE;
1094         str = (CharPtr) Realloc(str, SeqSize + 5);
1095       }
1096       *t++;
1097     }
1098     str[i] = NULLB;
1099     fseq->seq = StringSave(str);
1100     MemFree(str);
1101     break;
1102   }
1103   if(fseq->label == NULL)
1104     fseq->label = StringSave(">Your sequence");
1105   return fseq;
1106 }
1107 
FilterNucSequence(CharPtr p)1108 static Int4 FilterNucSequence(CharPtr p)
1109 {
1110   CharPtr t, s;
1111   Int4 len, i =0;
1112   Int4 NumBadChar=0;
1113   len = StringLen(p);
1114 
1115   s = p;
1116 
1117   for(t = p; i < len; i++){
1118     *p = toupper(*p);
1119     if (isalpha(*p)) {
1120       *t = *p;
1121       *t++;
1122     } else {
1123       if(!isspace(*p)) {
1124         NumBadChar++;
1125       }
1126     }
1127     *p++;
1128   }
1129   *t = NULLB;
1130   p = s;
1131   return NumBadChar;
1132 }
1133