1 /* @include ajpat *************************************************************
2 **
3 ** AJAX PATTERN (ajax pattern and patternlist) functions
4 **
5 ** These functions allow handling of patternlists.
6 **
7 ** @author Copyright (C) 2004 Henrikki Almusa, Medicel Oy, Finland
8 ** @version $Revision: 1.12 $
9 ** @modified Aug 10 Beta version
10 ** @modified 2004-2011 Peter Rice
11 ** @modified $Date: 2011/10/18 14:23:40 $ by $Author: rice $
12 ** @@
13 **
14 ** This library is free software; you can redistribute it and/or
15 ** modify it under the terms of the GNU Lesser General Public
16 ** License as published by the Free Software Foundation; either
17 ** version 2.1 of the License, or (at your option) any later version.
18 **
19 ** This library is distributed in the hope that it will be useful,
20 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 ** Lesser General Public License for more details.
23 **
24 ** You should have received a copy of the GNU Lesser General Public
25 ** License along with this library; if not, write to the Free Software
26 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
27 ** MA  02110-1301,  USA.
28 **
29 ******************************************************************************/
30 
31 #ifndef AJPAT_H
32 #define AJPAT_H
33 
34 /* ========================================================================= */
35 /* ============================= include files ============================= */
36 /* ========================================================================= */
37 
38 #include "ajdefine.h"
39 #include "ajstr.h"
40 #include "ajlist.h"
41 #include "ajreg.h"
42 
43 AJ_BEGIN_DECLS
44 
45 
46 
47 
48 /* ========================================================================= */
49 /* =============================== constants =============================== */
50 /* ========================================================================= */
51 
52 
53 
54 
55 /*
56 **  Defines for string search algorithms
57 */
58 #define AJALPHA  256                    /* Alphabet                     */
59 #define AJMOD256 0xff
60 #define AJALPHA2 128                    /* ASCII printable              */
61 #define AJWORD   32                     /* Size of a word               */
62 #define AJBPS    1                      /* Bits per state               */
63 
64 
65 
66 
67 /* ========================================================================= */
68 /* ============================== public data ============================== */
69 /* ========================================================================= */
70 
71 
72 
73 
74 /* @data AjPPatBYPNode ********************************************************
75 **
76 ** AJAX data structure for nodes in Baeza-Yates & Perleberg algorithm
77 **
78 ** @attr next [struct AjSPatBYPNode*] Pointer to next node
79 ** @attr offset [ajint] Offset
80 ** @attr Padding [char[4]] Padding to alignment boundary
81 ** @@
82 ******************************************************************************/
83 
84 typedef struct AjSPatBYPNode
85 {
86     struct AjSPatBYPNode *next;
87     ajint offset;
88     char  Padding[4];
89 } AjOPatBYPNode;
90 
91 #define AjPPatBYPNode AjOPatBYPNode*
92 
93 
94 
95 
96 /* @data AjPPatComp ***********************************************************
97 **
98 ** AJAX data structure that holds all needed data for compiling and
99 ** searching. Not including mismatch number.
100 **
101 ** @alias AjSPatComp
102 ** @alias AjOPatComp
103 **
104 ** @attr pattern [AjPStr] Prosite pattern string
105 ** @attr type [ajuint] Prosite pattern compile type
106 ** @attr plen [ajuint] Prosite pattern length
107 ** @attr buf [ajint*] Buffer for BMH search (can be -1)
108 ** @attr off [AjOPatBYPNode[AJALPHA]] Offset buffer for B-Y/P search
109 ** @attr sotable [ajuint*] Buffer for SHIFT-OR
110 ** @attr solimit [ajuint] Limit for BMH search
111 ** @attr m [ajuint] Real length of pattern (from embPatGetType)
112 ** @attr regex [AjPStr] PCRE regexp string
113 ** @attr skipm [ajuint**] Skip buffer for Tarhio-Ukkonen
114 ** @attr amino [AjBool] Must match left begin
115 ** @attr carboxyl [AjBool] Must match right
116 **
117 ** @@
118 ******************************************************************************/
119 
120 typedef struct AjSPatComp
121 {
122     AjPStr pattern;
123     ajuint type;
124     ajuint plen;
125     ajint* buf;
126     AjOPatBYPNode off[AJALPHA];
127     ajuint* sotable;
128     ajuint solimit;
129     ajuint m;
130     AjPStr regex;
131     ajuint** skipm;
132     AjBool amino;
133     AjBool carboxyl;
134 } AjOPatComp;
135 
136 #define AjPPatComp AjOPatComp*
137 
138 
139 
140 
141 /* @data AjPPatternSeq ********************************************************
142 **
143 ** Ajax sequence pattern object.
144 **
145 ** Holds definition of feature pattern. Regular expression patterns ignore
146 ** mismatch value.
147 **
148 ** @alias AjSPatternSeq
149 ** @alias AjOPatternSeq
150 **
151 ** @attr Name [AjPStr] Name.
152 ** @attr Pattern [AjPStr] Pattern in string format.
153 ** @attr Compiled [void*] Compiled version of the pattern.
154 ** @attr Protein [AjBool] True if protein pattern
155 ** @attr Mismatch [ajuint] Mismatch value.
156 **
157 ** @new ajPatternSeqNew Default constructor
158 ** @delete ajPatternSeqDel Default destructor
159 ** @use ajPatternSeqGetName Returns name.
160 ** @use ajPatternSeqGetPattern Returns pattern in string format.
161 ** @use ajPatternSeqGetCompiled Returns pointer to compiled pattern.
162 ** @use ajPatternSeqGetType Returns type as integer value.
163 ** @use ajPatternSeqGetMismatch Return mismatch value.
164 ** @@
165 ******************************************************************************/
166 
167 typedef struct AjSPatternSeq
168 {
169     AjPStr Name;
170     AjPStr Pattern;
171     void* Compiled;
172     AjBool Protein;
173     ajuint Mismatch;
174 } AjOPatternSeq;
175 
176 #define AjPPatternSeq AjOPatternSeq*
177 
178 
179 
180 
181 /* @data AjPPatternRegex ******************************************************
182 **
183 ** Ajax regular expression pattern object.
184 **
185 ** Holds definition of feature pattern. Regular expression patterns ignore
186 ** mismatch value.
187 **
188 ** @alias AjSPatternRegex
189 ** @alias AjOPatternRegex
190 **
191 ** @attr Name [AjPStr] Name.
192 ** @attr Pattern [AjPStr] Pattern in string format.
193 ** @attr Compiled [AjPRegexp] Compiled version of the pattern.
194 ** @attr Type [ajuint] Type.
195 ** @attr Padding [char[4]] Padding to alignment boundary
196 **
197 ** @new ajPatternRegexNew Default constructor
198 ** @delete ajPatternRegexDel Default destructor
199 ** @use ajPatternRegexGetName Returns name.
200 ** @use ajPatternRegexGetPattern Returns pattern in string format.
201 ** @use ajPatternRegexGetCompiled Returns pointer to compiled pattern.
202 ** @use ajPatternRegexGetType Returns type as integer value.
203 ** @@
204 ******************************************************************************/
205 
206 typedef struct AjSPatternRegex
207 {
208     AjPStr    Name;
209     AjPStr    Pattern;
210     AjPRegexp Compiled;
211     ajuint    Type;
212     char      Padding[4];
213 } AjOPatternRegex;
214 
215 #define AjPPatternRegex AjOPatternRegex*
216 
217 
218 
219 
220 /*
221 ** type can be 0: string, 1: prosite (protein) 2: prosite like (nucleotide)
222 */
223 #define AJ_PAT_TYPE_STRING 0
224 #define AJ_PAT_TYPE_PRO 1
225 #define AJ_PAT_TYPE_NUCL 2
226 
227 
228 
229 
230 /* @data AjPPatlistSeq ********************************************************
231 **
232 ** Ajax Pattern List object.
233 **
234 ** Holds list of feature patterns and general information of them.
235 **
236 ** @alias AjSPatlistSeq
237 ** @alias AjOPatlistSeq
238 **
239 ** @attr Patlist [AjPList] List for patterns.
240 ** @attr Iter [AjIList] List iterator.
241 ** @attr Protein [AjBool] True if protein
242 ** @attr Padding [char[4]] Padding to alignment boundary
243 **
244 ** @new ajPatlistNew Default constructor.
245 ** @delete ajPatlistDel Default destructor.
246 ** @modify ajPatlistRegexRead Reads the pattern file and fills the list.
247 ** @modify ajPatlistSeqRead Reads the pattern file and fills the list.
248 ** @modify ajPatlistRewind Restarts the iteration loop.
249 ** @modify ajPatlistAdd Adds new pattern into list.
250 ** @use ajPatlistGetNext Gets the next pattern from file and returns true if
251 **      available and false if not.
252 ** @@
253 ******************************************************************************/
254 
255 typedef struct AjSPatlistSeq
256 {
257     AjPList Patlist;
258     AjIList Iter;
259     AjBool Protein;
260     char   Padding[4];
261 } AjOPatlistSeq;
262 
263 #define AjPPatlistSeq AjOPatlistSeq*
264 
265 
266 
267 
268 /* @data AjPPatlistRegex ******************************************************
269 **
270 ** Ajax Pattern List object.
271 **
272 ** Holds list of feature patterns and general information of them.
273 **
274 ** @alias AjSPatlistRegex
275 ** @alias AjOPatlistRegex
276 **
277 ** @attr Patlist [AjPList] List for patterns.
278 ** @attr Iter [AjIList] List iterator.
279 ** @attr Type [ajuint] Type of expression
280 ** @attr Padding [char[4]] Padding to alignment boundary
281 **
282 ** @new ajPatlistNew Default constructor.
283 ** @delete ajPatlistDel Default destructor.
284 ** @modify ajPatlistRegexRead Reads the pattern file and fills the list.
285 ** @modify ajPatlistSeqRead Reads the pattern file and fills the list.
286 ** @modify ajPatlistRewind Restarts the iteration loop.
287 ** @modify ajPatlistAdd Adds new pattern into list.
288 ** @use ajPatlistGetNext Gets the next pattern from file and returns true if
289 **      available and false if not.
290 ** @@
291 ******************************************************************************/
292 
293 typedef struct AjSPatlistRegex
294 {
295     AjPList Patlist;
296     AjIList Iter;
297     ajuint Type;
298     char Padding[4];
299 } AjOPatlistRegex;
300 
301 #define AjPPatlistRegex AjOPatlistRegex*
302 
303 
304 
305 
306 /* ========================================================================= */
307 /* =========================== public functions ============================ */
308 /* ========================================================================= */
309 
310 
311 
312 
313 /*
314 ** Prototype definitions
315 */
316 
317 AjPPatternSeq ajPatternSeqNewList(AjPPatlistSeq plist, const AjPStr name,
318                                   const AjPStr pat, ajuint mismatch);
319 void ajPatternSeqDel(AjPPatternSeq* pthys);
320 const AjPStr ajPatternSeqGetName(const AjPPatternSeq thys);
321 const AjPStr ajPatternSeqGetPattern(const AjPPatternSeq thys);
322 AjPPatComp ajPatternSeqGetCompiled(const AjPPatternSeq thys);
323 AjBool ajPatternSeqGetProtein(const AjPPatternSeq thys);
324 ajuint ajPatternSeqGetMismatch(const AjPPatternSeq thys);
325 void ajPatternSeqSetCompiled(AjPPatternSeq thys, void* pat);
326 void ajPatternSeqDebug(const AjPPatternSeq pat);
327 
328 AjPPatternRegex ajPatternRegexNewList(AjPPatlistRegex plist,
329                                       const AjPStr name,
330                                       const AjPStr pat);
331 void ajPatternRegexDel(AjPPatternRegex* pthys);
332 const AjPStr ajPatternRegexGetName(const AjPPatternRegex thys);
333 const AjPStr ajPatternRegexGetPattern(const AjPPatternRegex thys);
334 AjPRegexp ajPatternRegexGetCompiled(const AjPPatternRegex thys);
335 ajuint ajPatternRegexGetType(const AjPPatternRegex thys);
336 void ajPatternRegexSetCompiled(AjPPatternRegex thys, AjPRegexp pat);
337 void ajPatternRegexDebug(const AjPPatternRegex pat);
338 
339 /* Patlist handling functions */
340 AjPPatlistSeq ajPatlistSeqNewType(AjBool Protein);
341 AjPPatlistRegex ajPatlistRegexNewType(ajuint type);
342 AjPPatlistRegex ajPatlistRegexNew(void);
343 AjPPatlistSeq ajPatlistSeqNew(void);
344 void ajPatlistSeqDel(AjPPatlistSeq* pthys);
345 void ajPatlistRegexDel(AjPPatlistRegex* pthys);
346 AjPPatlistRegex ajPatlistRegexRead(const AjPStr patspec,
347                                    const AjPStr patname,
348                                    const AjPStr fmt,
349                                    ajuint type, AjBool upper, AjBool lower);
350 AjPPatlistSeq ajPatlistSeqRead(const AjPStr patspec,
351                                const AjPStr patname,
352                                const AjPStr fmt,
353                                AjBool protein, ajuint mismatches);
354 AjBool ajPatlistRegexGetNext(AjPPatlistRegex thys,
355                              AjPPatternRegex* pattern);
356 AjBool ajPatlistSeqGetNext(AjPPatlistSeq thys,
357                            AjPPatternSeq* pattern);
358 void ajPatlistSeqRewind(AjPPatlistSeq thys);
359 void ajPatlistRegexRewind(AjPPatlistRegex thys);
360 void ajPatlistSeqRemoveCurrent(AjPPatlistSeq thys);
361 void ajPatlistRegexRemoveCurrent(AjPPatlistRegex thys);
362 void ajPatlistAddRegex(AjPPatlistRegex thys, AjPPatternRegex pat);
363 void ajPatlistAddSeq(AjPPatlistSeq thys, AjPPatternSeq pat);
364 ajuint ajPatlistSeqGetSize(const AjPPatlistSeq plist);
365 ajuint ajPatlistRegexGetSize(const AjPPatlistRegex plist);
366 ajuint ajPatlistRegexDoc(AjPPatlistRegex thys, AjPStr* pdoc);
367 ajuint ajPatlistSeqDoc(AjPPatlistSeq thys, AjPStr* pdoc);
368 
369 AjPPatComp      ajPatCompNew(void);
370 void            ajPatCompDel(AjPPatComp* pthys);
371 ajuint ajPatternRegexType(const AjPStr type);
372 
373 /*
374 ** End of prototype definitions
375 */
376 
377 
378 
379 
380 AJ_END_DECLS
381 
382 #endif /* !AJPAT_H */
383