1 /* @source patmatmotifs.c
2 ** @author Copyright (C) Sinead O'Leary (soleary@hgmp.mrc.ac.uk)
3 ** @@
4 ** Application for pattern matching, a sequence against a database of motifs.
5 **
6 ** This program is free software; you can redistribute it and/or
7 ** modify it under the terms of the GNU General Public License
8 ** as published by the Free Software Foundation; either version 2
9 ** of the License, or (at your option) any later version.
10 **
11 ** This program is distributed in the hope that it will be useful,
12 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ** GNU General Public License for more details.
15 **
16 ** You should have received a copy of the GNU General Public License
17 ** along with this program; if not, write to the Free Software
18 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 ******************************************************************************/
20
21 #include "emboss.h"
22
23
24
25
26 /* @prog patmatmotifs *********************************************************
27 **
28 ** Search a PROSITE motif database with a protein sequence
29 **
30 ******************************************************************************/
31
main(int argc,char ** argv)32 int main(int argc, char **argv)
33 {
34
35 AjPFile inf = NULL;
36 AjPFile inf2 = NULL;
37 AjPFeattable tab = NULL;
38 AjPReport report = NULL;
39
40 AjPSeq sequence = NULL;
41
42 AjPStr redatanew = NULL;
43 AjPStr str = NULL;
44 AjPStr regexp = NULL;
45 AjPStr temp = NULL;
46 AjPStr text = NULL;
47 AjPStr docdata = NULL;
48 AjPStr data = NULL;
49 AjPStr accession = NULL;
50 AjPStr name = NULL;
51 EmbPPatMatch match = NULL;
52 AjPStr savereg = NULL;
53 AjPStr fthit = NULL;
54
55 AjBool full;
56 AjBool prune;
57
58 ajint i;
59 ajint number;
60 ajint start;
61 ajint end;
62 ajint length;
63 ajint zstart;
64 ajint zend;
65 const char *p;
66 ajint seqlength;
67 AjPStr tmpstr = NULL;
68 AjPStr tailstr = NULL;
69 AjPFeature gf;
70
71 embInit("patmatmotifs", argc, argv);
72
73 ajStrAssignC(&fthit, "SO:0001067");
74
75 savereg = ajStrNew();
76 str = ajStrNew();
77 regexp = ajStrNew();
78 temp = ajStrNew();
79 data = ajStrNew();
80 accession = ajStrNew();
81 text = ajStrNew();
82 name = ajStrNew();
83
84 sequence = ajAcdGetSeq("sequence");
85 report = ajAcdGetReport("outfile");
86 full = ajAcdGetBoolean("full");
87 prune = ajAcdGetBoolean("prune");
88
89 ajSeqFmtUpper(sequence); /* prosite regexs are all upper case */
90 tab = ajFeattableNewSeq(sequence);
91 ajStrAssignC(&tailstr, "");
92
93 seqlength = ajStrGetLen(str);
94 str = ajSeqGetSeqCopyS(sequence);
95
96 redatanew = ajStrNewC("PROSITE/prosite.lines");
97 docdata = ajStrNewC("PROSITE/");
98
99 inf = ajDatafileNewInNameS(redatanew);
100 if(!inf)
101 ajFatal("Either EMBOSS_DATA undefined or PROSEXTRACT needs running");
102
103 ajFmtPrintAppS(&tmpstr, "Full: %B\n", full);
104 ajFmtPrintAppS(&tmpstr, "Prune: %B\n", prune);
105 ajFmtPrintAppS(&tmpstr, "Data_file: %F\n", inf);
106 ajReportSetHeaderS(report, tmpstr);
107
108 while(ajReadlineTrim(inf, ®exp))
109 {
110 p=ajStrGetPtr(regexp);
111 if(*p && *p!=' ' && *p!='^')
112 {
113 p=ajSysFuncStrtok(p," ");
114 ajStrAssignC(&name,p);
115 if(prune)
116 if(ajStrMatchCaseC(name,"myristyl") ||
117 ajStrMatchCaseC(name,"asn_glycosylation") ||
118 ajStrMatchCaseC(name,"camp_phospho_site") ||
119 ajStrMatchCaseC(name,"pkc_phospho_site") ||
120 ajStrMatchCaseC(name,"ck2_phospho_site") ||
121 ajStrMatchCaseC(name,"tyr_phospho_site"))
122 {
123 for(i=0;i<4;++i)
124 ajReadlineTrim(inf, ®exp);
125 continue;
126 }
127 p=ajSysFuncStrtok(NULL," ");
128 ajStrAssignC(&accession,p);
129 }
130
131 if(ajStrPrefixC(regexp, "^"))
132 {
133 p = ajStrGetPtr(regexp);
134
135 ajStrAssignC(&temp,p+1);
136 ajStrAssignC(&savereg,p+1);
137
138 match = embPatMatchFind(temp, str, ajFalse, ajFalse);
139 number = embPatMatchGetNumber(match);
140
141 for(i=0; i<number; i++)
142 {
143 seqlength = ajStrGetLen(str);
144
145 start = 1+embPatMatchGetStart(match, i);
146
147 end = 1+embPatMatchGetEnd(match, i);
148
149 length = embPatMatchGetLen(match, i);
150
151 gf = ajFeatNew(tab, NULL, fthit, start, end,
152 (float) length, ' ', 0);
153
154 ajFmtPrintS(&tmpstr, "*motif %S", name);
155 ajFeatTagAddSS(gf, NULL, tmpstr);
156
157 if(start-5<0)
158 zstart = 0;
159 else
160 zstart = start-5;
161
162 if(end+5> seqlength)
163 zend = end;
164 else
165 zend = end+5;
166
167
168 ajStrAssignSubS(&temp, str, zstart, zend);
169 }
170
171
172 if(full && number)
173 {
174 ajStrAssignC(&redatanew,ajStrGetPtr(docdata));
175 ajStrAppendC(&redatanew,ajStrGetPtr(accession));
176 inf2 = ajDatafileNewInNameS(redatanew);
177 if(!inf2)
178 continue;
179
180 /*
181 ** Insert Prosite documentation from files made by
182 ** prosextract.c
183 */
184 ajFmtPrintAppS(&tailstr, "Motif: %S\n", name);
185 ajFmtPrintAppS(&tailstr, "Count: %d\n\n", number);
186 while(ajReadlineTrim(inf2, &text))
187 ajFmtPrintAppS(&tailstr, "%S\n", text);
188
189 ajFmtPrintAppS(&tailstr, "\n***************\n\n");
190 ajFileClose(&inf2);
191
192 }
193 embPatMatchDel(&match);
194 }
195 }
196
197 ajReportSetTailS(report,tailstr);
198 ajReportWrite(report, tab, sequence);
199
200 ajReportDel(&report);
201 ajFeattableDel(&tab);
202
203 ajStrDel(&temp);
204 ajStrDel(®exp);
205 ajStrDel(&savereg);
206 ajStrDel(&str);
207 ajStrDel(&data);
208 ajStrDel(&docdata);
209 ajStrDel(&text);
210 ajStrDel(&redatanew);
211 ajStrDel(&accession);
212 ajSeqDel(&sequence);
213 ajStrDel(&tailstr);
214 ajStrDel(&fthit);
215 ajStrDel(&name);
216 ajStrDel(&tmpstr);
217
218 ajFeattableDel(&tab);
219 ajFileClose(&inf);
220
221 embExit();
222
223 return 0;
224 }
225