1 /* @source prosextract.c
2 ** @author Copyright (C) Sinead O'Leary (soleary@hgmp.mrc.ac.uk)
3 **
4 ** Application for extracting relevent lines from the Prosite motif database.
5 **
6 ** This program is free software; you can redistribute it and/or
7 ** modify it under the terms of the GNU General Public License
8 ** as published by the Free Software Foundation; either version 2
9 ** of the License, or (at your option) any later version.
10 **
11 ** This program is distributed in the hope that it will be useful,
12 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 ** GNU General Public License for more details.
15 **
16 ** You should have received a copy of the GNU General Public License
17 ** along with this program; if not, write to the Free Software
18 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19 ******************************************************************************/
20 
21 #include "emboss.h"
22 
23 #define DATANAME "PROSITE/prosite.lines"
24 
25 
26 
27 
28 /* @prog prosextract **********************************************************
29 **
30 ** Builds the PROSITE motif database for patmatmotifs to search
31 **
32 ******************************************************************************/
33 
main(int argc,char ** argv)34 int main(int argc, char **argv)
35 {
36     AjPFile infdat = NULL;
37     AjPFile infdoc = NULL;
38     AjPFile outf   = NULL;
39     AjPFile outs   = NULL;
40 
41     AjBool  haspattern;
42 
43     const char   *p;
44 
45 
46     AjPStr line  = NULL;
47     AjPStr text  = NULL;
48     AjPStr dirname  = NULL;
49     AjPStr filename = NULL;
50     AjPStr id    = NULL;
51     AjPStr ac    = NULL;
52     AjPStr de    = NULL;
53     AjPStr pa    = NULL;
54     AjPStr ps    = NULL;
55     AjPStr fn    = NULL;
56     AjPStr re    = NULL;
57     AjPStr fname = NULL;
58     AjBool flag;
59     AjBool isopen;
60     AjBool goback;
61 
62     ajlong storepos = 0L;
63 
64 
65     embInit("prosextract", argc, argv);
66 
67     dirname = ajAcdGetDirectoryName("prositedir");
68 
69     line = ajStrNew();
70     text = ajStrNew();
71 
72     id = ajStrNew();
73     ac = ajStrNew();
74     de = ajStrNew();
75     pa = ajStrNew();
76     ps = ajStrNew();
77 
78 
79 
80     fn=ajStrNew();
81     ajStrAssignS(&fn,dirname);
82     ajStrAppendC(&fn,"prosite.dat");
83     if(!(infdat=ajFileNewInNameS(fn)))
84 	ajFatal("Cannot open file %S",fn);
85     ajStrDel(&fn);
86 
87 
88 
89     fn=ajStrNewC("PROSITE/prosite.lines");
90     outf = ajDatafileNewOutNameS(fn);
91     ajStrDel(&fn);
92 
93 
94 
95     haspattern = ajFalse;
96 
97     while(ajReadlineTrim(infdat, &line) )
98     {
99 	if(ajStrPrefixC(line, "ID"))
100 	{
101 	    if(ajStrSuffixC(line,"PATTERN."))
102 	    {
103 		haspattern = ajTrue;
104 		/*save id*/
105 		p = ajStrGetPtr(line);
106 		p = ajSysFuncStrtok(p," \t;");
107 		p = ajSysFuncStrtok(NULL," \t;");
108 		ajStrAssignC(&id,p);
109 		ajFmtPrintF(outf, "%S ", id);
110 		continue;
111 	    }
112 	    else
113 	    {
114 		haspattern = ajFalse;
115 		continue;
116 	    }
117 	}
118 
119 	if(!haspattern)
120 	    continue;
121 
122 
123 	if(ajStrPrefixC(line, "AC") )
124 	{
125 	    p = ajStrGetPtr(line);
126 	    p = ajSysFuncStrtok(p, " \t;");
127 	    p = ajSysFuncStrtok(NULL, " \t;");
128 	    ajStrAssignC(&ac,p);
129 	    ajFmtPrintF(outf, "%S\n ", ac);
130 	    continue;
131 	}
132 
133     	if(ajStrPrefixC(line, "DE") )
134 	{
135 	    p = ajStrGetPtr(line);
136 	    p = ajSysFuncStrtok(p, " \t.");
137 	    p = ajSysFuncStrtok(NULL, " \t.");
138 	    ajStrAssignC(&de,p);
139 	    ajFmtPrintF(outf, "%S\n ", de);
140 	    continue;
141 	}
142 
143 
144 	if(ajStrPrefixC(line, "PA"))
145 	{
146 	    ajStrAssignC(&pa,"");
147 
148 	    while(ajStrPrefixC(line,"PA"))
149 	    {
150 		p = ajStrGetPtr(line);
151 		p = ajSysFuncStrtok(p, " \t.");
152 		p = ajSysFuncStrtok(NULL, " \t.");
153 		ajStrAppendC(&pa,p);
154 		ajReadlineTrim(infdat, &line);
155 	    }
156 
157 	    ajFmtPrintF(outf, "%S\n", pa);
158 	    re = embPatPrositeToRegExp(pa);
159 	    ajFmtPrintF(outf, "^%S\n\n", re);
160 	    ajStrDel(&re);
161 	    continue;
162 	}
163     }
164 
165 
166   /* Finished processing prosite.dat so look at prosite.doc */
167 
168 
169     fn = ajStrNew();
170     ajStrAssignS(&fn,dirname);
171     ajStrAppendC(&fn,"prosite.doc");
172     if(!(infdoc=ajFileNewInNameS(fn)))
173 	ajFatal("Cannot open file %S",fn);
174     ajStrDel(&fn);
175 
176 
177 
178     fname  = ajStrNewC("PROSITE/");
179     flag   = ajFalse;
180     isopen = ajFalse;
181     goback = ajFalse;
182 
183 
184     while(ajReadlineTrim(infdoc, &text))
185     {
186 	if(ajStrPrefixC(text, "{PS") && isopen && !goback)
187 	    goback = ajTrue;
188 
189 	if(ajStrPrefixC(text, "{PS") && !isopen)
190 	{
191 	    storepos = ajFileResetPos(infdoc);
192 	    /* save out the documentation text to acc numbered outfiles . */
193 	    p = ajStrGetPtr(text)+1;
194 	    p = ajSysFuncStrtok(p, ";");
195 	    ajStrAssignS(&filename, fname);
196 	    ajStrAppendC(&filename, p);
197 
198 	    outs = ajDatafileNewOutNameS(filename);
199 	    flag   = ajTrue;
200 	    isopen = ajTrue;
201 	    continue;
202 	}
203 
204 
205 	if(ajStrPrefixC(text, "{BEGIN}") && flag)
206 	{
207 	    while(ajReadlineTrim(infdoc, &text))
208 	    {
209 		if(ajStrPrefixC(text,"{END}"))
210 		    break;
211 
212 		ajFmtPrintF(outs, "%S\n", text);
213 	    }
214 	    ajFileClose(&outs);
215 	    isopen = ajFalse;
216 
217 	    if(goback)
218 	    {
219 		goback = ajFalse;
220 		ajFileSeek(infdoc,storepos,0);
221 	    }
222 
223 	}
224     }
225 
226     ajStrDel(&line);
227     ajStrDel(&text);
228     ajStrDel(&dirname);
229     ajStrDel(&filename);
230 
231     ajStrDel(&id);
232     ajStrDel(&ac);
233     ajStrDel(&de);
234     ajStrDel(&pa);
235     ajStrDel(&re);
236     ajStrDel(&ps);
237     ajStrDel(&fname);
238 
239 
240     ajFileClose(&infdat);
241     ajFileClose(&infdoc);
242     ajFileClose(&outf);
243 
244     embExit();
245 
246     return 0;
247 }
248