1 /* @source tfscan application
2 **
3 ** Finds transcription factors
4 ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk)
5 ** @@
6 **
7 ** 12/03/2000: (AJB) Added accession numbers, end points and matching sequence
8 **
9 ** This program is free software; you can redistribute it and/or
10 ** modify it under the terms of the GNU General Public License
11 ** as published by the Free Software Foundation; either version 2
12 ** of the License, or (at your option) any later version.
13 **
14 ** This program is distributed in the hope that it will be useful,
15 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ** GNU General Public License for more details.
18 **
19 ** You should have received a copy of the GNU General Public License
20 ** along with this program; if not, write to the Free Software
21 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 ******************************************************************************/
23
24 #include "emboss.h"
25
26
27
28
29 static void tfscan_print_hits(AjPList *l, ajint hits,
30 AjPReport outf,
31 const AjPTable t, const AjPSeq seq,
32 ajuint minlength,
33 const AjPTable btable);
34
35
36
37
38 /* @prog tfscan ***************************************************************
39 **
40 ** Scans DNA sequences for transcription factors
41 **
42 ******************************************************************************/
43
main(int argc,char ** argv)44 int main(int argc, char **argv)
45 {
46 AjPSeqall seqall;
47 AjPSeq seq = NULL;
48 AjPReport outf = NULL;
49 AjPFile inf = NULL;
50
51 ajint begin;
52 ajint end;
53
54 AjPList l = NULL;
55
56 AjPStr strand = NULL;
57 AjPStr substr = NULL;
58 AjPStr line = NULL;
59 AjPStr name = NULL;
60 AjPStr acc = NULL;
61 AjPStr bf = NULL;
62 AjPStr menu;
63 AjPStr pattern = NULL;
64 AjPStr opattern = NULL;
65 AjPStr pname = NULL;
66 AjPStr key = NULL;
67 AjPStr value = NULL;
68 AjPTable atable = NULL;
69 AjPTable btable = NULL;
70
71 ajint mismatch;
72 ajint minlength;
73
74 ajint sum;
75 ajint v;
76
77 char cp;
78 const char *p;
79
80
81 embInit("tfscan", argc, argv);
82
83 seqall = ajAcdGetSeqall("sequence");
84 outf = ajAcdGetReport("outfile");
85 mismatch = ajAcdGetInt("mismatch");
86 minlength = ajAcdGetInt("minlength");
87 menu = ajAcdGetListSingle("menu");
88
89 pname = ajStrNew();
90 cp=ajStrGetCharFirst(menu);
91
92 if(cp=='F')
93 ajStrAssignC(&pname,"tffungi");
94 else if(cp=='I')
95 ajStrAssignC(&pname,"tfinsect");
96 else if(cp=='O')
97 ajStrAssignC(&pname,"tfother");
98 else if(cp=='P')
99 ajStrAssignC(&pname,"tfplant");
100 else if(cp=='V')
101 ajStrAssignC(&pname,"tfvertebrate");
102 else if(cp=='C')
103 inf = ajAcdGetDatafile("custom");
104
105 if(cp!='C')
106 {
107 inf = ajDatafileNewInNameS(pname);
108 if(!inf)
109 ajFatal("Either EMBOSS_DATA undefined or TFEXTRACT needs running");
110 }
111
112 name = ajStrNew();
113 acc = ajStrNew();
114 bf = ajStrNewC("");
115 substr = ajStrNew();
116 line = ajStrNew();
117 pattern = ajStrNewC("AA");
118 opattern = ajStrNew();
119
120 while(ajSeqallNext(seqall, &seq))
121 {
122 begin=ajSeqallGetseqBegin(seqall);
123 end=ajSeqallGetseqEnd(seqall);
124 ajStrAssignC(&name,ajSeqGetNameC(seq));
125 strand=ajSeqGetSeqCopyS(seq);
126
127 ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1);
128 ajStrFmtUpper(&substr);
129
130 l=ajListNew();
131 atable = ajTablestrNew(1000);
132 btable = ajTablestrNew(1000);
133
134 sum=0;
135 while(ajReadlineTrim(inf,&line))
136 {
137 p = ajStrGetPtr(line);
138
139 if(!*p || *p=='#' || *p=='\n' || *p=='!')
140 continue;
141
142 ajFmtScanS(line,"%S%S%S",&pname,&pattern,&acc);
143 p += ajStrGetLen(pname);
144 while(*p && *p==' ')
145 ++p;
146 p += ajStrGetLen(pattern);
147 while(*p && *p==' ')
148 ++p;
149 p += ajStrGetLen(acc);
150 while(*p && *p==' ')
151 ++p;
152
153 ajStrAssignS(&opattern,pattern);
154 ajStrAssignC(&bf,p); /* rest of line */
155
156 v = embPatVariablePattern(pattern,substr,pname,l,0,
157 mismatch,begin);
158 if(v)
159 {
160 key = ajStrNewS(pname);
161 value = ajStrNewS(acc);
162 ajTablePut(atable,(void *)key,(void *)value);
163 key = ajStrNewS(pname);
164 value = ajStrNewS(bf);
165 ajTablePut(btable,(void *)key,(void *)value);
166 }
167 sum += v;
168 }
169
170 if(sum)
171 tfscan_print_hits(&l,sum,outf,atable,seq,minlength,
172 btable);
173
174 ajFileSeek(inf,0L,0);
175 ajListFree(&l);
176 ajTablestrFree(&atable);
177 ajTablestrFree(&btable);
178 ajStrDel(&strand);
179 }
180
181 ajStrDel(&line);
182 ajStrDel(&name);
183 ajStrDel(&acc);
184 ajStrDel(&pname);
185 ajStrDel(&opattern);
186 ajStrDel(&bf);
187 ajStrDel(&pattern);
188 ajStrDel(&substr);
189 ajSeqDel(&seq);
190 ajFileClose(&inf);
191 ajReportClose(outf);
192 ajReportDel(&outf);
193
194 ajSeqallDel(&seqall);
195 ajSeqDel(&seq);
196 ajStrDel(&menu);
197
198 embExit();
199
200 return 0;
201 }
202
203
204
205
206 /* @funcstatic tfscan_print_hits **********************************************
207 **
208 ** Print matches to transcription factor sites
209 **
210 ** @param [w] l [AjPList*] list of hits
211 ** @param [r] hits [ajint] number of hits
212 ** @param [u] outf [AjPReport] output report
213 ** @param [r] t [const AjPTable] table of accession numbers
214 ** @param [r] seq [const AjPSeq] test sequence
215 ** @param [r] minlength [ajuint] minimum length of pattern
216 ** @param [r] btable [const AjPTable] BF lines from transfac (if any)
217 ** @@
218 ******************************************************************************/
219
tfscan_print_hits(AjPList * l,ajint hits,AjPReport outf,const AjPTable t,const AjPSeq seq,ajuint minlength,const AjPTable btable)220 static void tfscan_print_hits(AjPList *l,
221 ajint hits, AjPReport outf,
222 const AjPTable t,
223 const AjPSeq seq, ajuint minlength,
224 const AjPTable btable)
225 {
226 ajint i;
227 EmbPMatMatch m;
228 const AjPStr acc = NULL;
229 const AjPStr bf = NULL;
230 AjPFeattable ftable = NULL;
231 AjPFeature gf = NULL;
232 AjPStr type = ajStrNewC("SO:0000235");
233 AjPStr tmpstr = NULL;
234
235 ftable = ajFeattableNewSeq(seq);
236
237 /*ajFmtPrintF(outf,"TFSCAN of %s from %d to %d\n\n",ajStrGetPtr(name),
238 begin,end);*/
239
240 for(i=0;i<hits;++i)
241 {
242 ajListPop(*l,(void **)&m);
243 acc = ajTableFetchS(t, m->seqname);
244
245 if(m->len >= minlength)
246 {
247 /*ajFmtPrintF(outf,"%-20s %-8s %-5d %-5d %s\n",
248 ajStrGetPtr(m->seqname),
249 ajStrGetPtr(acc),m->start,
250 m->start+m->len-1,ajStrGetPtr(s));*/
251 if(m->forward)
252 gf = ajFeatNew(ftable, ajUtilGetProgram(), type,
253 m->start,
254 m->start+m->len-1,
255 (float) m->score,
256 '+',0);
257 else
258 gf = ajFeatNew(ftable, ajUtilGetProgram(), type,
259 m->start,
260 m->start+m->len-1,
261 (float) m->score,
262 '-',0);
263 ajFmtPrintS(&tmpstr, "*acc %S", acc);
264 ajFeatTagAddSS(gf, NULL, tmpstr);
265 bf = ajTableFetchS(btable, m->seqname);
266 ajFmtPrintS(&tmpstr, "*factor %S", bf);
267 ajFeatTagAddSS(gf, NULL, tmpstr);
268 }
269
270 embMatMatchDel(&m);
271 }
272
273 ajReportWrite(outf, ftable, seq);
274 ajFeattableDel(&ftable);
275 ajStrDel(&tmpstr);
276 ajStrDel(&type);
277
278 return;
279 }
280