1 /* @source fuzztran application
2 **
3 ** Finds fuzzy protein patterns in nucleic acid sequences
4 ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk)
5 **
6 ** Last modified by Jon Ison on Wed Jun 28 14:04:18 BST 2006
7 ** @@
8 **
9 ** This program is free software; you can redistribute it and/or
10 ** modify it under the terms of the GNU General Public License
11 ** as published by the Free Software Foundation; either version 2
12 ** of the License, or (at your option) any later version.
13 **
14 ** This program is distributed in the hope that it will be useful,
15 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ** GNU General Public License for more details.
18 **
19 ** You should have received a copy of the GNU General Public License
20 ** along with this program; if not, write to the Free Software
21 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 ******************************************************************************/
23
24 #include "emboss.h"
25
26
27 static void fuzztran_SourceFeature(const AjPFeattable thys, const AjPSeq pseq,
28 ajint start, ajint frame,
29 AjPFeattable sourcetab);
30
31
32
33
34 /* @prog fuzztran *************************************************************
35 **
36 ** Protein pattern search after translation
37 **
38 ******************************************************************************/
39
main(int argc,char ** argv)40 int main(int argc, char **argv)
41 {
42 AjPSeqall seqall = NULL;
43 AjPSeq seq = NULL;
44 AjPFeattable tab = NULL;
45 AjPFeattable seqtab = NULL;
46 AjPReport report = NULL;
47 AjPPatlistSeq plist = NULL;
48 AjBool writeok = ajTrue;
49
50 AjPSeq pseq = NULL;
51
52 AjPStr gcode;
53 AjPTrn trantable;
54 AjPStr frame;
55 AjPStr pro = NULL;
56 ajint table;
57 ajint frameno;
58
59 AjPStr text = NULL;
60
61 ajint begin;
62 ajint end;
63
64 AjPStr tmpstr = NULL;
65
66 embInit("fuzztran", argc, argv);
67
68 seqall = ajAcdGetSeqall("sequence");
69 plist = ajAcdGetPattern("pattern");
70 report = ajAcdGetReport("outfile");
71 gcode = ajAcdGetListSingle("table");
72 frame = ajAcdGetListSingle("frame");
73
74 ajPatlistSeqDoc(plist, &tmpstr);
75 ajFmtPrintAppS(&tmpstr, "TransTable: %S\n", ajAcdGetValue("table"));
76 ajFmtPrintAppS(&tmpstr, "Frames: %S\n", ajAcdGetValue("frame"));
77 ajReportSetHeaderS(report, tmpstr);
78
79 ajStrToInt(gcode,&table);
80
81 trantable = ajTrnNewI(table);
82
83 writeok=ajTrue;
84 while(writeok && ajSeqallNext(seqall,&seq))
85 {
86 begin = ajSeqallGetseqBegin(seqall);
87 end = ajSeqallGetseqEnd(seqall);
88 ajStrAssignSubC(&text,ajSeqGetSeqC(seq),begin-1,end-1);
89 ajStrFmtUpper(&text);
90 seqtab = ajFeattableNewDna(ajSeqGetNameS(seq));
91
92 if(ajStrMatchCaseC(frame,"F") || ajStrMatchCaseC(frame,"6"))
93 {
94 tab = ajFeattableNewProt(ajSeqGetNameS(seq));
95 ajStrAssignC(&pro,"");
96 ajTrnSeqFrameS(trantable,text,1,&pro);
97 pseq = ajSeqNewNameS(pro, ajSeqGetNameS(seq));
98 embPatlistSeqSearchAll(tab,pseq,plist,ajFalse);
99 fuzztran_SourceFeature(tab, pseq, begin, 1, seqtab);
100 ajFeattableDel(&tab);
101 ajSeqDel(&pseq);
102
103 tab = ajFeattableNewProt(ajSeqGetNameS(seq));
104 ajStrAssignC(&pro,"");
105 ajTrnSeqFrameS(trantable,text,2,&pro);
106 pseq = ajSeqNewNameS(pro, ajSeqGetNameS(seq));
107 embPatlistSeqSearchAll(tab,pseq,plist,ajFalse);
108 fuzztran_SourceFeature(tab, pseq, begin, 2, seqtab);
109 ajFeattableDel(&tab);
110 ajSeqDel(&pseq);
111
112 tab = ajFeattableNewProt(ajSeqGetNameS(seq));
113 ajStrAssignC(&pro,"");
114 ajTrnSeqFrameS(trantable,text,3,&pro);
115 pseq = ajSeqNewNameS(pro, ajSeqGetNameS(seq));
116 embPatlistSeqSearchAll(tab,pseq,plist,ajFalse);
117 fuzztran_SourceFeature(tab, pseq, begin, 3, seqtab);
118 ajFeattableDel(&tab);
119 ajSeqDel(&pseq);
120 }
121 if(ajStrMatchCaseC(frame,"R") || ajStrMatchCaseC(frame,"6"))
122 {
123 tab = ajFeattableNewProt(ajSeqGetNameS(seq));
124 ajStrAssignC(&pro,"");
125 ajTrnSeqFrameS(trantable,text,-1,&pro);
126 pseq = ajSeqNewNameS(pro, ajSeqGetNameS(seq));
127 embPatlistSeqSearchAll(tab,pseq,plist,ajFalse);
128 fuzztran_SourceFeature(tab, pseq, begin, -1, seqtab);
129 ajFeattableDel(&tab);
130 ajSeqDel(&pseq);
131
132 tab = ajFeattableNewProt(ajSeqGetNameS(seq));
133 ajStrAssignC(&pro,"");
134 ajTrnSeqFrameS(trantable,text,-2,&pro);
135 pseq = ajSeqNewNameS(pro, ajSeqGetNameS(seq));
136 embPatlistSeqSearchAll(tab,pseq,plist,ajFalse);
137 fuzztran_SourceFeature(tab, pseq, begin, -2, seqtab);
138 ajFeattableDel(&tab);
139 ajSeqDel(&pseq);
140
141 tab = ajFeattableNewProt(ajSeqGetNameS(seq));
142 ajStrAssignC(&pro,"");
143 ajTrnSeqFrameS(trantable,text,-3,&pro);
144 pseq = ajSeqNewNameS(pro, ajSeqGetNameS(seq));
145 embPatlistSeqSearchAll(tab,pseq,plist,ajFalse);
146 fuzztran_SourceFeature(tab, pseq, end, -3, seqtab);
147 ajFeattableDel(&tab);
148 ajSeqDel(&pseq);
149 }
150 ajStrToInt(frame,&frameno);
151 if(frameno != 0 && frameno != 6)
152 {
153 tab = ajFeattableNewProt(ajSeqGetNameS(seq));
154 ajStrAssignC(&pro,"");
155 ajTrnSeqFrameS(trantable,text,frameno,&pro);
156 pseq = ajSeqNewNameS(pro, ajSeqGetNameS(seq));
157 embPatlistSeqSearchAll(tab,pseq,plist,ajFalse);
158 fuzztran_SourceFeature(tab, pseq, begin, frameno, seqtab);
159 ajFeattableDel(&tab);
160 ajSeqDel(&pseq);
161 }
162
163 if(ajFeattableGetSize(seqtab))
164 writeok = ajReportWrite(report, seqtab, seq);
165 ajStrDel(&text);
166 }
167 ajReportSetSeqstats(report, seqall);
168
169 ajStrDel(&pro);
170 ajStrDel(&text);
171 ajStrDel(&tmpstr);
172
173 ajStrDel(&gcode);
174 ajStrDel(&frame);
175
176 ajSeqallDel(&seqall);
177 ajSeqDel(&seq);
178 ajSeqDel(&pseq);
179
180 ajReportClose(report);
181 ajReportDel(&report);
182 ajPatlistSeqDel(&plist);
183 ajFeattableDel(&seqtab);
184 ajFeattableDel(&tab);
185
186 ajTrnDel(&trantable);
187 embExit();
188
189 return 0;
190 }
191
192
193
194
195 /* @funcstatic fuzztran_SourceFeature *****************************************
196 **
197 ** Creates a dna source feature matching the all features in a
198 ** protein fetaure table.
199 **
200 ** @param [r] thys [const AjPFeattable] Protein feature table object
201 ** @param [r] pseq [const AjPSeq] Protein sequence object
202 ** @param [r] start [ajint] Start position in source sequence
203 ** (base position of start of first codon,
204 ** counted from the start of the sequence)
205 ** @param [r] frame [ajint] Reading frame 1, 2 or 3 for forward and
206 ** -1 -2 or -3 for reverse
207 ** @param [u] sourcetab [AjPFeattable] Protein feature table object
208 ** @return [void]
209 ** @@
210 ******************************************************************************/
211
fuzztran_SourceFeature(const AjPFeattable thys,const AjPSeq pseq,ajint start,ajint frame,AjPFeattable sourcetab)212 static void fuzztran_SourceFeature(const AjPFeattable thys, const AjPSeq pseq,
213 ajint start, ajint frame,
214 AjPFeattable sourcetab)
215 {
216 AjIList iter=NULL;
217 AjPFeature protfeature = NULL;
218 AjPFeature sourcefeature = NULL;
219 char strand;
220 ajint framenum;
221 AjBool rev;
222 ajint begin = 0;
223 ajint end = 0;
224 AjPStr fthit = NULL;
225 AjPStr s = NULL;
226 AjPStr t = NULL;
227 AjPStr v = NULL;
228
229 ajStrAssignC(&fthit, "hit");
230
231 if(frame > 0)
232 {
233 strand = '+';
234 framenum = frame;
235 rev = ajFalse;
236 }
237 else
238 {
239 strand = '-';
240 framenum = -frame;
241 rev = ajTrue;
242 }
243
244 if(thys->Features)
245 {
246 iter = ajListIterNew(thys->Features) ;
247 while(!ajListIterDone(iter))
248 {
249 protfeature = (AjPFeature)ajListIterGet(iter);
250 if(rev)
251 {
252 begin = start - framenum
253 - 3*protfeature->Start + 4;
254 end = start - framenum
255 - 3*protfeature->End + 2;
256 }
257 else
258 {
259 begin = start + framenum
260 + 3*protfeature->Start - 4;
261 end = start + framenum
262 + 3*protfeature->End - 2;
263 }
264 sourcefeature = ajFeatNew(sourcetab, NULL, fthit, begin, end,
265 ajFeatGetScore(protfeature),
266 strand, framenum);
267 if(ajFeatGetNoteC(protfeature, "pat", &v))
268 {
269 ajFmtPrintS(&s, "*pat %S", v);
270 ajFeatTagAddSS(sourcefeature, NULL, s);
271 }
272 if(ajFeatGetNoteC(protfeature, "mismatch", &v))
273 {
274 ajFmtPrintS(&s, "*mismatch %S", v);
275 ajFeatTagAddSS(sourcefeature, NULL, s);
276 }
277 ajFmtPrintS(&s, "*frame %d", framenum);
278 ajFeatTagAddSS(sourcefeature, NULL, s);
279
280 ajFmtPrintS(&s, "*start %d", ajFeatGetStart(protfeature));
281 ajFeatTagAddSS(sourcefeature, NULL, s);
282
283 ajFmtPrintS(&s, "*end %d", ajFeatGetEnd(protfeature));
284 ajFeatTagAddSS(sourcefeature, NULL, s);
285
286 ajStrAssignSubS(&t,ajSeqGetSeqS(pseq),
287 ajFeatGetStart(protfeature)-1,
288 ajFeatGetEnd(protfeature)-1);
289 ajFmtPrintS(&s, "*translation %S", t);
290 ajFeatTagAddSS(sourcefeature, NULL, s);
291
292 }
293 ajListIterDel(&iter) ;
294 }
295
296 ajStrDel(&fthit);
297 ajStrDel(&s);
298 ajStrDel(&t);
299 ajStrDel(&v);
300
301 return;
302 }
303