1 /* @source patmatmotifs.c
2 ** @author Copyright (C) Sinead O'Leary (soleary@hgmp.mrc.ac.uk)
3 ** @@
4 ** Application for pattern matching, a sequence against a database of motifs.
5 **
6 ** This program is free software; you can redistribute it and/or
7 ** modify it under the terms of the GNU General Public License
8 ** as published by the Free Software Foundation; either version 2
9 ** of the License, or (at your option) any later version.
10 **
11 ** This program is distributed in the hope that it will be useful,
12 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 ** GNU General Public License for more details.
15 **
16 ** You should have received a copy of the GNU General Public License
17 ** along with this program; if not, write to the Free Software
18 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19 ******************************************************************************/
20 
21 #include "emboss.h"
22 
23 
24 
25 
26 /* @prog patmatmotifs *********************************************************
27 **
28 ** Search a PROSITE motif database with a protein sequence
29 **
30 ******************************************************************************/
31 
main(int argc,char ** argv)32 int main(int argc, char **argv)
33 {
34 
35     AjPFile inf	 = NULL;
36     AjPFile inf2 = NULL;
37     AjPFeattable tab = NULL;
38     AjPReport report = NULL;
39 
40     AjPSeq sequence = NULL;
41 
42     AjPStr redatanew = NULL;
43     AjPStr str	     = NULL;
44     AjPStr regexp    = NULL;
45     AjPStr temp	     = NULL;
46     AjPStr text      = NULL;
47     AjPStr docdata   = NULL;
48     AjPStr data      = NULL;
49     AjPStr accession = NULL;
50     AjPStr name      = NULL;
51     EmbPPatMatch match = NULL;
52     AjPStr savereg = NULL;
53     AjPStr fthit   = NULL;
54 
55     AjBool full;
56     AjBool prune;
57 
58     ajint i;
59     ajint number;
60     ajint start;
61     ajint end;
62     ajint length;
63     ajint zstart;
64     ajint zend;
65     const char *p;
66     ajint seqlength;
67     AjPStr tmpstr  = NULL;
68     AjPStr tailstr = NULL;
69     AjPFeature gf;
70 
71     embInit("patmatmotifs", argc, argv);
72 
73     ajStrAssignC(&fthit, "SO:0001067");
74 
75     savereg   = ajStrNew();
76     str       = ajStrNew();
77     regexp    = ajStrNew();
78     temp      = ajStrNew();
79     data      = ajStrNew();
80     accession = ajStrNew();
81     text      = ajStrNew();
82     name      = ajStrNew();
83 
84     sequence = ajAcdGetSeq("sequence");
85     report   = ajAcdGetReport("outfile");
86     full     = ajAcdGetBoolean("full");
87     prune    = ajAcdGetBoolean("prune");
88 
89     ajSeqFmtUpper(sequence);		/* prosite regexs are all upper case */
90     tab = ajFeattableNewSeq(sequence);
91     ajStrAssignC(&tailstr, "");
92 
93     seqlength = ajStrGetLen(str);
94     str       = ajSeqGetSeqCopyS(sequence);
95 
96     redatanew = ajStrNewC("PROSITE/prosite.lines");
97     docdata   = ajStrNewC("PROSITE/");
98 
99     inf = ajDatafileNewInNameS(redatanew);
100     if(!inf)
101 	ajFatal("Either EMBOSS_DATA undefined or PROSEXTRACT needs running");
102 
103     ajFmtPrintAppS(&tmpstr, "Full: %B\n", full);
104     ajFmtPrintAppS(&tmpstr, "Prune: %B\n", prune);
105     ajFmtPrintAppS(&tmpstr, "Data_file: %F\n", inf);
106     ajReportSetHeaderS(report, tmpstr);
107 
108     while(ajReadlineTrim(inf, &regexp))
109     {
110 	p=ajStrGetPtr(regexp);
111 	if(*p && *p!=' ' && *p!='^')
112 	{
113 	    p=ajSysFuncStrtok(p," ");
114 	    ajStrAssignC(&name,p);
115 	    if(prune)
116 		if(ajStrMatchCaseC(name,"myristyl") ||
117 		   ajStrMatchCaseC(name,"asn_glycosylation") ||
118 		   ajStrMatchCaseC(name,"camp_phospho_site") ||
119 		   ajStrMatchCaseC(name,"pkc_phospho_site") ||
120 		   ajStrMatchCaseC(name,"ck2_phospho_site") ||
121 		   ajStrMatchCaseC(name,"tyr_phospho_site"))
122 		{
123 		    for(i=0;i<4;++i)
124 			ajReadlineTrim(inf, &regexp);
125 		    continue;
126 		}
127 	    p=ajSysFuncStrtok(NULL," ");
128 	    ajStrAssignC(&accession,p);
129 	}
130 
131 	if(ajStrPrefixC(regexp, "^"))
132 	{
133 	    p = ajStrGetPtr(regexp);
134 
135 	    ajStrAssignC(&temp,p+1);
136 	    ajStrAssignC(&savereg,p+1);
137 
138 	    match = embPatMatchFind(temp, str, ajFalse, ajFalse);
139 	    number = embPatMatchGetNumber(match);
140 
141 	    for(i=0; i<number; i++)
142 	    {
143 		seqlength = ajStrGetLen(str);
144 
145 		start = 1+embPatMatchGetStart(match, i);
146 
147 		end = 1+embPatMatchGetEnd(match, i);
148 
149 		length = embPatMatchGetLen(match, i);
150 
151 		gf = ajFeatNew(tab, NULL, fthit, start, end,
152 			       (float) length, ' ', 0);
153 
154 		ajFmtPrintS(&tmpstr, "*motif %S", name);
155 		ajFeatTagAddSS(gf, NULL, tmpstr);
156 
157 		if(start-5<0)
158 		    zstart = 0;
159 		else
160 		    zstart = start-5;
161 
162 		if(end+5> seqlength)
163 		    zend = end;
164 		else
165 		    zend = end+5;
166 
167 
168 		ajStrAssignSubS(&temp, str, zstart, zend);
169 	    }
170 
171 
172 	    if(full && number)
173 	    {
174 		ajStrAssignC(&redatanew,ajStrGetPtr(docdata));
175 		ajStrAppendC(&redatanew,ajStrGetPtr(accession));
176 		inf2 = ajDatafileNewInNameS(redatanew);
177 		if(!inf2)
178 		    continue;
179 
180 		/*
181 		** Insert Prosite documentation from files made by
182 		** prosextract.c
183 		*/
184 		ajFmtPrintAppS(&tailstr, "Motif: %S\n", name);
185 		ajFmtPrintAppS(&tailstr, "Count: %d\n\n", number);
186 		while(ajReadlineTrim(inf2, &text))
187 		    ajFmtPrintAppS(&tailstr, "%S\n", text);
188 
189 		ajFmtPrintAppS(&tailstr, "\n***************\n\n");
190 		ajFileClose(&inf2);
191 
192 	    }
193 	    embPatMatchDel(&match);
194 	}
195     }
196 
197     ajReportSetTailS(report,tailstr);
198     ajReportWrite(report, tab, sequence);
199 
200     ajReportDel(&report);
201     ajFeattableDel(&tab);
202 
203     ajStrDel(&temp);
204     ajStrDel(&regexp);
205     ajStrDel(&savereg);
206     ajStrDel(&str);
207     ajStrDel(&data);
208     ajStrDel(&docdata);
209     ajStrDel(&text);
210     ajStrDel(&redatanew);
211     ajStrDel(&accession);
212     ajSeqDel(&sequence);
213     ajStrDel(&tailstr);
214     ajStrDel(&fthit);
215     ajStrDel(&name);
216     ajStrDel(&tmpstr);
217 
218     ajFeattableDel(&tab);
219     ajFileClose(&inf);
220 
221     embExit();
222 
223     return 0;
224 }
225