1 /* @source prosextract.c
2 ** @author Copyright (C) Sinead O'Leary (soleary@hgmp.mrc.ac.uk)
3 **
4 ** Application for extracting relevent lines from the Prosite motif database.
5 **
6 ** This program is free software; you can redistribute it and/or
7 ** modify it under the terms of the GNU General Public License
8 ** as published by the Free Software Foundation; either version 2
9 ** of the License, or (at your option) any later version.
10 **
11 ** This program is distributed in the hope that it will be useful,
12 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ** GNU General Public License for more details.
15 **
16 ** You should have received a copy of the GNU General Public License
17 ** along with this program; if not, write to the Free Software
18 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 ******************************************************************************/
20
21 #include "emboss.h"
22
23 #define DATANAME "PROSITE/prosite.lines"
24
25
26
27
28 /* @prog prosextract **********************************************************
29 **
30 ** Builds the PROSITE motif database for patmatmotifs to search
31 **
32 ******************************************************************************/
33
main(int argc,char ** argv)34 int main(int argc, char **argv)
35 {
36 AjPFile infdat = NULL;
37 AjPFile infdoc = NULL;
38 AjPFile outf = NULL;
39 AjPFile outs = NULL;
40
41 AjBool haspattern;
42
43 const char *p;
44
45
46 AjPStr line = NULL;
47 AjPStr text = NULL;
48 AjPStr dirname = NULL;
49 AjPStr filename = NULL;
50 AjPStr id = NULL;
51 AjPStr ac = NULL;
52 AjPStr de = NULL;
53 AjPStr pa = NULL;
54 AjPStr ps = NULL;
55 AjPStr fn = NULL;
56 AjPStr re = NULL;
57 AjPStr fname = NULL;
58 AjBool flag;
59 AjBool isopen;
60 AjBool goback;
61
62 ajlong storepos = 0L;
63
64
65 embInit("prosextract", argc, argv);
66
67 dirname = ajAcdGetDirectoryName("prositedir");
68
69 line = ajStrNew();
70 text = ajStrNew();
71
72 id = ajStrNew();
73 ac = ajStrNew();
74 de = ajStrNew();
75 pa = ajStrNew();
76 ps = ajStrNew();
77
78
79
80 fn=ajStrNew();
81 ajStrAssignS(&fn,dirname);
82 ajStrAppendC(&fn,"prosite.dat");
83 if(!(infdat=ajFileNewInNameS(fn)))
84 ajFatal("Cannot open file %S",fn);
85 ajStrDel(&fn);
86
87
88
89 fn=ajStrNewC("PROSITE/prosite.lines");
90 outf = ajDatafileNewOutNameS(fn);
91 ajStrDel(&fn);
92
93
94
95 haspattern = ajFalse;
96
97 while(ajReadlineTrim(infdat, &line) )
98 {
99 if(ajStrPrefixC(line, "ID"))
100 {
101 if(ajStrSuffixC(line,"PATTERN."))
102 {
103 haspattern = ajTrue;
104 /*save id*/
105 p = ajStrGetPtr(line);
106 p = ajSysFuncStrtok(p," \t;");
107 p = ajSysFuncStrtok(NULL," \t;");
108 ajStrAssignC(&id,p);
109 ajFmtPrintF(outf, "%S ", id);
110 continue;
111 }
112 else
113 {
114 haspattern = ajFalse;
115 continue;
116 }
117 }
118
119 if(!haspattern)
120 continue;
121
122
123 if(ajStrPrefixC(line, "AC") )
124 {
125 p = ajStrGetPtr(line);
126 p = ajSysFuncStrtok(p, " \t;");
127 p = ajSysFuncStrtok(NULL, " \t;");
128 ajStrAssignC(&ac,p);
129 ajFmtPrintF(outf, "%S\n ", ac);
130 continue;
131 }
132
133 if(ajStrPrefixC(line, "DE") )
134 {
135 p = ajStrGetPtr(line);
136 p = ajSysFuncStrtok(p, " \t.");
137 p = ajSysFuncStrtok(NULL, " \t.");
138 ajStrAssignC(&de,p);
139 ajFmtPrintF(outf, "%S\n ", de);
140 continue;
141 }
142
143
144 if(ajStrPrefixC(line, "PA"))
145 {
146 ajStrAssignC(&pa,"");
147
148 while(ajStrPrefixC(line,"PA"))
149 {
150 p = ajStrGetPtr(line);
151 p = ajSysFuncStrtok(p, " \t.");
152 p = ajSysFuncStrtok(NULL, " \t.");
153 ajStrAppendC(&pa,p);
154 ajReadlineTrim(infdat, &line);
155 }
156
157 ajFmtPrintF(outf, "%S\n", pa);
158 re = embPatPrositeToRegExp(pa);
159 ajFmtPrintF(outf, "^%S\n\n", re);
160 ajStrDel(&re);
161 continue;
162 }
163 }
164
165
166 /* Finished processing prosite.dat so look at prosite.doc */
167
168
169 fn = ajStrNew();
170 ajStrAssignS(&fn,dirname);
171 ajStrAppendC(&fn,"prosite.doc");
172 if(!(infdoc=ajFileNewInNameS(fn)))
173 ajFatal("Cannot open file %S",fn);
174 ajStrDel(&fn);
175
176
177
178 fname = ajStrNewC("PROSITE/");
179 flag = ajFalse;
180 isopen = ajFalse;
181 goback = ajFalse;
182
183
184 while(ajReadlineTrim(infdoc, &text))
185 {
186 if(ajStrPrefixC(text, "{PS") && isopen && !goback)
187 goback = ajTrue;
188
189 if(ajStrPrefixC(text, "{PS") && !isopen)
190 {
191 storepos = ajFileResetPos(infdoc);
192 /* save out the documentation text to acc numbered outfiles . */
193 p = ajStrGetPtr(text)+1;
194 p = ajSysFuncStrtok(p, ";");
195 ajStrAssignS(&filename, fname);
196 ajStrAppendC(&filename, p);
197
198 outs = ajDatafileNewOutNameS(filename);
199 flag = ajTrue;
200 isopen = ajTrue;
201 continue;
202 }
203
204
205 if(ajStrPrefixC(text, "{BEGIN}") && flag)
206 {
207 while(ajReadlineTrim(infdoc, &text))
208 {
209 if(ajStrPrefixC(text,"{END}"))
210 break;
211
212 ajFmtPrintF(outs, "%S\n", text);
213 }
214 ajFileClose(&outs);
215 isopen = ajFalse;
216
217 if(goback)
218 {
219 goback = ajFalse;
220 ajFileSeek(infdoc,storepos,0);
221 }
222
223 }
224 }
225
226 ajStrDel(&line);
227 ajStrDel(&text);
228 ajStrDel(&dirname);
229 ajStrDel(&filename);
230
231 ajStrDel(&id);
232 ajStrDel(&ac);
233 ajStrDel(&de);
234 ajStrDel(&pa);
235 ajStrDel(&re);
236 ajStrDel(&ps);
237 ajStrDel(&fname);
238
239
240 ajFileClose(&infdat);
241 ajFileClose(&infdoc);
242 ajFileClose(&outf);
243
244 embExit();
245
246 return 0;
247 }
248