1 /**************************************************************************
2 * *
3 * COPYRIGHT NOTICE *
4 * *
5 * This software/database is categorized as "United States Government *
6 * Work" under the terms of the United States Copyright Act. It was *
7 * produced as part of the author's official duties as a Government *
8 * employee and thus can not be copyrighted. This software/database is *
9 * freely available to the public for use without a copyright notice. *
10 * Restrictions can not be placed on its present or future use. *
11 * *
12 * Although all reasonable efforts have been taken to ensure the accuracy *
13 * and reliability of the software and data, the National Library of *
14 * Medicine (NLM) and the U.S. Government do not and can not warrant the *
15 * performance or results that may be obtained by using this software, *
16 * data, or derivative works thereof. The NLM and the U.S. Government *
17 * disclaim any and all warranties, expressed or implied, as to the *
18 * performance, merchantability or fitness for any particular purpose or *
19 * use. *
20 * *
21 * In any work or product derived from this material, proper attribution *
22 * of the author(s) as the source of the software or data would be *
23 * appreciated. *
24 * *
25 **************************************************************************/
26 /* Author Hugues Sicotte 8/14/99
27 Stand-alone program to dust one sequence.
28 future improvements are to take a whole fasta library as input.
29 */
30 #include <ncbi.h>
31 #include <objseq.h>
32 #include <objsset.h>
33 #include <sequtil.h>
34 #include <seqport.h>
35 #include <tofasta.h>
36 #include <blast.h>
37 #include <blastpri.h>
38 #include <txalign.h>
39 #include <objloc.h>
40 #include <dust.h>
41
42
43 #define NUMARG 6
44
45 static Args myargs [NUMARG] = {
46 { "sequence",
47 NULL, NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
48 { "Lowercase Masked letters (default is N)",
49 "F", NULL, NULL, FALSE, 'l', ARG_BOOLEAN, 0.0, 0, NULL},
50 { "is nucleotide)",
51 "T", NULL, NULL, FALSE, 'N', ARG_BOOLEAN, 0.0, 0, NULL},
52 { "level",
53 "20", NULL, NULL, FALSE, 'L', ARG_INT, 0.0, 0, NULL},
54 { "window size",
55 "16", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL},
56 { "Number of iterations of dusting (for timing purposes)",
57 "1", NULL, NULL, FALSE, 'I', ARG_INT, 0.0, 0, NULL}
58
59 };
60
Main(void)61 Int2 Main (void)
62
63 {
64 CharPtr inputfile;
65 FILE* infp, *outfp;
66 Boolean is_na,UseN;
67 BioseqPtr query;
68 SeqEntryPtr sep;
69 Int4 loop,i,start,stop,last;
70 SeqLocPtr slp,slp_dust;
71 Char c;
72 SeqPortPtr spp;
73 Int2 level,window;
74
75 if (! GetArgs ("testdust", NUMARG, myargs))
76 {
77 return (1);
78 }
79
80 if (! SeqEntryLoad())
81 return 1;
82
83 ErrSetMessageLevel(SEV_WARNING);
84
85 inputfile = myargs [0].strvalue;
86
87 if ((infp = FileOpen(inputfile, "r")) == NULL)
88 {
89 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", inputfile);
90 return (1);
91 }
92 is_na = (Boolean) myargs [2].intvalue;
93 UseN = (Boolean) myargs [1].intvalue;
94 level = (Int2) myargs[3].intvalue;
95 window = (Int2) myargs[4].intvalue;
96 loop = (Int2) myargs[5].intvalue;
97 sep = FastaToSeqEntry(infp, is_na);
98 FileClose(infp);
99 if (sep != NULL)
100 {
101 query = NULL;
102 if (is_na)
103 {
104 SeqEntryExplore(sep, &query, FindNuc);
105 }
106 else
107 {
108 SeqEntryExplore(sep, &query, FindProt);
109 }
110
111 if (query == NULL)
112 {
113 ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
114 return 2;
115 }
116 }
117 for(i=0;i<loop;i++) {
118 slp_dust = BioseqDust (query, (Int4)0, query->length-1,
119 level, window, (Int2)5, (Int2) 1);
120 if(i!=loop-1 && slp_dust)
121 slp_dust = SeqLocFree(slp_dust);
122 }
123 outfp = stdout;
124 if(slp_dust==NULL) {
125 BioseqRawToFasta(query, outfp, is_na);
126 } else {
127 SeqEntrysToDefline(sep,outfp,is_na,TRUE);
128 start = SeqLocStart(slp_dust);
129 stop = SeqLocStop(slp_dust);
130 spp = SeqPortNew(query,0,query->length-1,Seq_strand_plus,Seq_code_iupacna);
131 last =-1;
132 slp=NULL;
133 last = -1;
134 while((slp=SeqLocFindNext(slp_dust,slp))!=NULL) {
135 start = SeqLocStart(slp);
136 stop = SeqLocStop(slp);
137 for(i=last+1;i<start;) {
138 c= SeqPortGetResidue(spp);
139 fprintf(outfp,"%c",c);
140 i++;
141 if(!(i%80))
142 fprintf(outfp,"\n");
143 }
144 for(i=start;i<=stop;) {
145 c= SeqPortGetResidue(spp);
146 if(UseN)
147 c= 'N';
148 else
149 c = TO_LOWER(c);
150 fprintf(outfp,"%c",c);
151 i++;
152 if(!(i%80))
153 fprintf(outfp,"\n");
154 }
155
156 last = stop;
157 }
158 for(i=stop+1;i<query->length;) {
159 c= SeqPortGetResidue(spp);
160 fprintf(outfp,"%c",c);
161 i++;
162 if(!(i%80))
163 fprintf(outfp,"\n");
164 }
165
166 if(i%80)
167 fprintf(outfp,"\n");
168 SeqPortFree(spp);
169 slp_dust = SeqLocFree(slp_dust);
170 }
171
172 SeqEntryFree(sep);
173
174 return 0;
175 }
176
177