1 /**************************************************************************
2 *                                                                         *
3 *                             COPYRIGHT NOTICE                            *
4 *                                                                         *
5 * This software/database is categorized as "United States Government      *
6 * Work" under the terms of the United States Copyright Act.  It was       *
7 * produced as part of the author's official duties as a Government        *
8 * employee and thus can not be copyrighted.  This software/database is    *
9 * freely available to the public for use without a copyright notice.      *
10 * Restrictions can not be placed on its present or future use.            *
11 *                                                                         *
12 * Although all reasonable efforts have been taken to ensure the accuracy  *
13 * and reliability of the software and data, the National Library of       *
14 * Medicine (NLM) and the U.S. Government do not and can not warrant the   *
15 * performance or results that may be obtained by using this software,     *
16 * data, or derivative works thereof.  The NLM and the U.S. Government     *
17 * disclaim any and all warranties, expressed or implied, as to the        *
18 * performance, merchantability or fitness for any particular purpose or   *
19 * use.                                                                    *
20 *                                                                         *
21 * In any work or product derived from this material, proper attribution   *
22 * of the author(s) as the source of the software or data would be         *
23 * appreciated.                                                            *
24 *                                                                         *
25 **************************************************************************/
26 /* Author Hugues Sicotte 8/14/99
27    Stand-alone program to dust one sequence.
28      future improvements are to take a whole fasta library as input.
29 */
30 #include <ncbi.h>
31 #include <objseq.h>
32 #include <objsset.h>
33 #include <sequtil.h>
34 #include <seqport.h>
35 #include <tofasta.h>
36 #include <blast.h>
37 #include <blastpri.h>
38 #include <txalign.h>
39 #include <objloc.h>
40 #include <dust.h>
41 
42 
43 #define NUMARG 6
44 
45 static Args myargs [NUMARG] = {
46   { "sequence",
47 	NULL, NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
48   { "Lowercase Masked letters (default is N)",
49         "F", NULL, NULL, FALSE, 'l', ARG_BOOLEAN, 0.0, 0, NULL},
50   { "is nucleotide)",
51         "T", NULL, NULL, FALSE, 'N', ARG_BOOLEAN, 0.0, 0, NULL},
52   { "level",
53         "20", NULL, NULL, FALSE, 'L', ARG_INT, 0.0, 0, NULL},
54   { "window size",
55         "16", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL},
56   { "Number of iterations of dusting (for timing purposes)",
57         "1", NULL, NULL, FALSE, 'I', ARG_INT, 0.0, 0, NULL}
58 
59 };
60 
Main(void)61 Int2 Main (void)
62 
63 {
64     CharPtr inputfile;
65     FILE* infp, *outfp;
66     Boolean is_na,UseN;
67     BioseqPtr query;
68     SeqEntryPtr sep;
69     Int4 loop,i,start,stop,last;
70     SeqLocPtr slp,slp_dust;
71     Char c;
72     SeqPortPtr spp;
73     Int2 level,window;
74 
75     if (! GetArgs ("testdust", NUMARG, myargs))
76         {
77             return (1);
78         }
79 
80     if (! SeqEntryLoad())
81         return 1;
82 
83     ErrSetMessageLevel(SEV_WARNING);
84 
85     inputfile = myargs [0].strvalue;
86 
87     if ((infp = FileOpen(inputfile, "r")) == NULL)
88 	{
89             ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", inputfile);
90             return (1);
91 	}
92     is_na = (Boolean) myargs [2].intvalue;
93     UseN = (Boolean) myargs [1].intvalue;
94     level = (Int2) myargs[3].intvalue;
95     window = (Int2) myargs[4].intvalue;
96     loop = (Int2) myargs[5].intvalue;
97     sep = FastaToSeqEntry(infp, is_na);
98     FileClose(infp);
99     if (sep != NULL)
100 	{
101             query = NULL;
102             if (is_na)
103 		{
104                     SeqEntryExplore(sep, &query, FindNuc);
105 		}
106             else
107 		{
108                     SeqEntryExplore(sep, &query, FindProt);
109 		}
110 
111 		if (query == NULL)
112 		{
113 	  	 	ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
114 	   		return 2;
115 		}
116 	}
117     for(i=0;i<loop;i++) {
118         slp_dust = BioseqDust (query, (Int4)0, query->length-1,
119                                level, window, (Int2)5, (Int2) 1);
120         if(i!=loop-1 && slp_dust)
121             slp_dust = SeqLocFree(slp_dust);
122     }
123     outfp = stdout;
124     if(slp_dust==NULL) {
125         BioseqRawToFasta(query, outfp, is_na);
126     } else {
127         SeqEntrysToDefline(sep,outfp,is_na,TRUE);
128         start = SeqLocStart(slp_dust);
129         stop = SeqLocStop(slp_dust);
130         spp = SeqPortNew(query,0,query->length-1,Seq_strand_plus,Seq_code_iupacna);
131         last =-1;
132         slp=NULL;
133         last = -1;
134         while((slp=SeqLocFindNext(slp_dust,slp))!=NULL) {
135             start = SeqLocStart(slp);
136             stop = SeqLocStop(slp);
137             for(i=last+1;i<start;) {
138                 c= SeqPortGetResidue(spp);
139                 fprintf(outfp,"%c",c);
140                 i++;
141                 if(!(i%80))
142                     fprintf(outfp,"\n");
143             }
144             for(i=start;i<=stop;) {
145                 c= SeqPortGetResidue(spp);
146                 if(UseN)
147                     c= 'N';
148                 else
149                     c = TO_LOWER(c);
150                 fprintf(outfp,"%c",c);
151                 i++;
152                 if(!(i%80))
153                     fprintf(outfp,"\n");
154             }
155 
156             last = stop;
157         }
158         for(i=stop+1;i<query->length;) {
159                 c= SeqPortGetResidue(spp);
160                 fprintf(outfp,"%c",c);
161                 i++;
162                 if(!(i%80))
163                     fprintf(outfp,"\n");
164         }
165 
166         if(i%80)
167             fprintf(outfp,"\n");
168         SeqPortFree(spp);
169         slp_dust = SeqLocFree(slp_dust);
170     }
171 
172     SeqEntryFree(sep);
173 
174     return 0;
175 }
176 
177