1 /* @source tfscan application
2 **
3 ** Finds transcription factors
4 ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk)
5 ** @@
6 **
7 ** 12/03/2000: (AJB) Added accession numbers, end points and matching sequence
8 **
9 ** This program is free software; you can redistribute it and/or
10 ** modify it under the terms of the GNU General Public License
11 ** as published by the Free Software Foundation; either version 2
12 ** of the License, or (at your option) any later version.
13 **
14 ** This program is distributed in the hope that it will be useful,
15 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 ** GNU General Public License for more details.
18 **
19 ** You should have received a copy of the GNU General Public License
20 ** along with this program; if not, write to the Free Software
21 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22 ******************************************************************************/
23 
24 #include "emboss.h"
25 
26 
27 
28 
29 static void tfscan_print_hits(AjPList *l, ajint hits,
30 			      AjPReport outf,
31 			      const AjPTable t, const AjPSeq seq,
32 			      ajuint minlength,
33 			      const AjPTable btable);
34 
35 
36 
37 
38 /* @prog tfscan ***************************************************************
39 **
40 ** Scans DNA sequences for transcription factors
41 **
42 ******************************************************************************/
43 
main(int argc,char ** argv)44 int main(int argc, char **argv)
45 {
46     AjPSeqall seqall;
47     AjPSeq seq   = NULL;
48     AjPReport outf = NULL;
49     AjPFile inf  = NULL;
50 
51     ajint begin;
52     ajint end;
53 
54     AjPList l = NULL;
55 
56     AjPStr strand = NULL;
57     AjPStr substr = NULL;
58     AjPStr line   = NULL;
59     AjPStr name   = NULL;
60     AjPStr acc    = NULL;
61     AjPStr bf     = NULL;
62     AjPStr menu;
63     AjPStr pattern  = NULL;
64     AjPStr opattern = NULL;
65     AjPStr pname    = NULL;
66     AjPStr key      = NULL;
67     AjPStr value    = NULL;
68     AjPTable atable = NULL;
69     AjPTable btable = NULL;
70 
71     ajint mismatch;
72     ajint minlength;
73 
74     ajint sum;
75     ajint v;
76 
77     char cp;
78     const char *p;
79 
80 
81     embInit("tfscan", argc, argv);
82 
83     seqall     = ajAcdGetSeqall("sequence");
84     outf       = ajAcdGetReport("outfile");
85     mismatch   = ajAcdGetInt("mismatch");
86     minlength  = ajAcdGetInt("minlength");
87     menu       = ajAcdGetListSingle("menu");
88 
89     pname = ajStrNew();
90     cp=ajStrGetCharFirst(menu);
91 
92     if(cp=='F')
93 	ajStrAssignC(&pname,"tffungi");
94     else if(cp=='I')
95 	ajStrAssignC(&pname,"tfinsect");
96     else if(cp=='O')
97 	ajStrAssignC(&pname,"tfother");
98     else if(cp=='P')
99 	ajStrAssignC(&pname,"tfplant");
100     else if(cp=='V')
101 	ajStrAssignC(&pname,"tfvertebrate");
102     else if(cp=='C')
103 	inf = ajAcdGetDatafile("custom");
104 
105     if(cp!='C')
106     {
107 	inf = ajDatafileNewInNameS(pname);
108 	if(!inf)
109 	    ajFatal("Either EMBOSS_DATA undefined or TFEXTRACT needs running");
110     }
111 
112     name     = ajStrNew();
113     acc      = ajStrNew();
114     bf       = ajStrNewC("");
115     substr   = ajStrNew();
116     line     = ajStrNew();
117     pattern  = ajStrNewC("AA");
118     opattern = ajStrNew();
119 
120     while(ajSeqallNext(seqall, &seq))
121     {
122 	begin=ajSeqallGetseqBegin(seqall);
123 	end=ajSeqallGetseqEnd(seqall);
124 	ajStrAssignC(&name,ajSeqGetNameC(seq));
125 	strand=ajSeqGetSeqCopyS(seq);
126 
127 	ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1);
128 	ajStrFmtUpper(&substr);
129 
130 	l=ajListNew();
131 	atable = ajTablestrNew(1000);
132 	btable = ajTablestrNew(1000);
133 
134 	sum=0;
135 	while(ajReadlineTrim(inf,&line))
136 	{
137 	    p = ajStrGetPtr(line);
138 
139 	    if(!*p || *p=='#' || *p=='\n' || *p=='!')
140 		continue;
141 
142 	    ajFmtScanS(line,"%S%S%S",&pname,&pattern,&acc);
143 	    p += ajStrGetLen(pname);
144 	    while(*p && *p==' ')
145 		++p;
146 	    p += ajStrGetLen(pattern);
147 	    while(*p && *p==' ')
148 		++p;
149 	    p += ajStrGetLen(acc);
150 	    while(*p && *p==' ')
151 		++p;
152 
153 	    ajStrAssignS(&opattern,pattern);
154 	    ajStrAssignC(&bf,p); /* rest of line */
155 
156 	    v = embPatVariablePattern(pattern,substr,pname,l,0,
157 				      mismatch,begin);
158 	    if(v)
159 	    {
160 		key = ajStrNewS(pname);
161 		value = ajStrNewS(acc);
162 		ajTablePut(atable,(void *)key,(void *)value);
163 		key = ajStrNewS(pname);
164 		value = ajStrNewS(bf);
165 		ajTablePut(btable,(void *)key,(void *)value);
166 	    }
167 	    sum += v;
168 	}
169 
170 	if(sum)
171 	    tfscan_print_hits(&l,sum,outf,atable,seq,minlength,
172 			      btable);
173 
174 	ajFileSeek(inf,0L,0);
175 	ajListFree(&l);
176 	ajTablestrFree(&atable);
177 	ajTablestrFree(&btable);
178 	ajStrDel(&strand);
179     }
180 
181     ajStrDel(&line);
182     ajStrDel(&name);
183     ajStrDel(&acc);
184     ajStrDel(&pname);
185     ajStrDel(&opattern);
186     ajStrDel(&bf);
187     ajStrDel(&pattern);
188     ajStrDel(&substr);
189     ajSeqDel(&seq);
190     ajFileClose(&inf);
191     ajReportClose(outf);
192     ajReportDel(&outf);
193 
194     ajSeqallDel(&seqall);
195     ajSeqDel(&seq);
196     ajStrDel(&menu);
197 
198     embExit();
199 
200     return 0;
201 }
202 
203 
204 
205 
206 /* @funcstatic tfscan_print_hits **********************************************
207 **
208 ** Print matches to transcription factor sites
209 **
210 ** @param [w] l [AjPList*] list of hits
211 ** @param [r] hits [ajint] number of hits
212 ** @param [u] outf [AjPReport] output report
213 ** @param [r] t [const AjPTable] table of accession numbers
214 ** @param [r] seq [const AjPSeq] test sequence
215 ** @param [r] minlength [ajuint] minimum length of pattern
216 ** @param [r] btable [const AjPTable] BF lines from transfac (if any)
217 ** @@
218 ******************************************************************************/
219 
tfscan_print_hits(AjPList * l,ajint hits,AjPReport outf,const AjPTable t,const AjPSeq seq,ajuint minlength,const AjPTable btable)220 static void tfscan_print_hits(AjPList *l,
221 			      ajint hits, AjPReport outf,
222 			      const AjPTable t,
223 			      const AjPSeq seq, ajuint minlength,
224 			      const  AjPTable btable)
225 {
226     ajint i;
227     EmbPMatMatch m;
228     const AjPStr acc = NULL;
229     const AjPStr bf  = NULL;
230     AjPFeattable ftable = NULL;
231     AjPFeature gf  = NULL;
232     AjPStr type = ajStrNewC("SO:0000235");
233     AjPStr tmpstr = NULL;
234 
235     ftable = ajFeattableNewSeq(seq);
236 
237     /*ajFmtPrintF(outf,"TFSCAN of %s from %d to %d\n\n",ajStrGetPtr(name),
238       begin,end);*/
239 
240     for(i=0;i<hits;++i)
241     {
242 	ajListPop(*l,(void **)&m);
243 	acc = ajTableFetchS(t, m->seqname);
244 
245 	if(m->len >= minlength)
246         {
247 	    /*ajFmtPrintF(outf,"%-20s %-8s %-5d %-5d %s\n",
248                         ajStrGetPtr(m->seqname),
249 			ajStrGetPtr(acc),m->start,
250 			m->start+m->len-1,ajStrGetPtr(s));*/
251             if(m->forward)
252                 gf = ajFeatNew(ftable, ajUtilGetProgram(), type,
253                                m->start,
254                                m->start+m->len-1,
255                                (float) m->score,
256                                '+',0);
257             else
258                 gf = ajFeatNew(ftable, ajUtilGetProgram(), type,
259                                m->start,
260                                m->start+m->len-1,
261                                (float) m->score,
262                                '-',0);
263             ajFmtPrintS(&tmpstr, "*acc %S", acc);
264             ajFeatTagAddSS(gf, NULL, tmpstr);
265             bf  = ajTableFetchS(btable, m->seqname);
266             ajFmtPrintS(&tmpstr, "*factor %S", bf);
267             ajFeatTagAddSS(gf, NULL, tmpstr);
268         }
269 
270 	embMatMatchDel(&m);
271     }
272 
273     ajReportWrite(outf, ftable, seq);
274     ajFeattableDel(&ftable);
275     ajStrDel(&tmpstr);
276     ajStrDel(&type);
277 
278     return;
279 }
280