1 /* @include ajpat ************************************************************* 2 ** 3 ** AJAX PATTERN (ajax pattern and patternlist) functions 4 ** 5 ** These functions allow handling of patternlists. 6 ** 7 ** @author Copyright (C) 2004 Henrikki Almusa, Medicel Oy, Finland 8 ** @version $Revision: 1.12 $ 9 ** @modified Aug 10 Beta version 10 ** @modified 2004-2011 Peter Rice 11 ** @modified $Date: 2011/10/18 14:23:40 $ by $Author: rice $ 12 ** @@ 13 ** 14 ** This library is free software; you can redistribute it and/or 15 ** modify it under the terms of the GNU Lesser General Public 16 ** License as published by the Free Software Foundation; either 17 ** version 2.1 of the License, or (at your option) any later version. 18 ** 19 ** This library is distributed in the hope that it will be useful, 20 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 21 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 ** Lesser General Public License for more details. 23 ** 24 ** You should have received a copy of the GNU Lesser General Public 25 ** License along with this library; if not, write to the Free Software 26 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 27 ** MA 02110-1301, USA. 28 ** 29 ******************************************************************************/ 30 31 #ifndef AJPAT_H 32 #define AJPAT_H 33 34 /* ========================================================================= */ 35 /* ============================= include files ============================= */ 36 /* ========================================================================= */ 37 38 #include "ajdefine.h" 39 #include "ajstr.h" 40 #include "ajlist.h" 41 #include "ajreg.h" 42 43 AJ_BEGIN_DECLS 44 45 46 47 48 /* ========================================================================= */ 49 /* =============================== constants =============================== */ 50 /* ========================================================================= */ 51 52 53 54 55 /* 56 ** Defines for string search algorithms 57 */ 58 #define AJALPHA 256 /* Alphabet */ 59 #define AJMOD256 0xff 60 #define AJALPHA2 128 /* ASCII printable */ 61 #define AJWORD 32 /* Size of a word */ 62 #define AJBPS 1 /* Bits per state */ 63 64 65 66 67 /* ========================================================================= */ 68 /* ============================== public data ============================== */ 69 /* ========================================================================= */ 70 71 72 73 74 /* @data AjPPatBYPNode ******************************************************** 75 ** 76 ** AJAX data structure for nodes in Baeza-Yates & Perleberg algorithm 77 ** 78 ** @attr next [struct AjSPatBYPNode*] Pointer to next node 79 ** @attr offset [ajint] Offset 80 ** @attr Padding [char[4]] Padding to alignment boundary 81 ** @@ 82 ******************************************************************************/ 83 84 typedef struct AjSPatBYPNode 85 { 86 struct AjSPatBYPNode *next; 87 ajint offset; 88 char Padding[4]; 89 } AjOPatBYPNode; 90 91 #define AjPPatBYPNode AjOPatBYPNode* 92 93 94 95 96 /* @data AjPPatComp *********************************************************** 97 ** 98 ** AJAX data structure that holds all needed data for compiling and 99 ** searching. Not including mismatch number. 100 ** 101 ** @alias AjSPatComp 102 ** @alias AjOPatComp 103 ** 104 ** @attr pattern [AjPStr] Prosite pattern string 105 ** @attr type [ajuint] Prosite pattern compile type 106 ** @attr plen [ajuint] Prosite pattern length 107 ** @attr buf [ajint*] Buffer for BMH search (can be -1) 108 ** @attr off [AjOPatBYPNode[AJALPHA]] Offset buffer for B-Y/P search 109 ** @attr sotable [ajuint*] Buffer for SHIFT-OR 110 ** @attr solimit [ajuint] Limit for BMH search 111 ** @attr m [ajuint] Real length of pattern (from embPatGetType) 112 ** @attr regex [AjPStr] PCRE regexp string 113 ** @attr skipm [ajuint**] Skip buffer for Tarhio-Ukkonen 114 ** @attr amino [AjBool] Must match left begin 115 ** @attr carboxyl [AjBool] Must match right 116 ** 117 ** @@ 118 ******************************************************************************/ 119 120 typedef struct AjSPatComp 121 { 122 AjPStr pattern; 123 ajuint type; 124 ajuint plen; 125 ajint* buf; 126 AjOPatBYPNode off[AJALPHA]; 127 ajuint* sotable; 128 ajuint solimit; 129 ajuint m; 130 AjPStr regex; 131 ajuint** skipm; 132 AjBool amino; 133 AjBool carboxyl; 134 } AjOPatComp; 135 136 #define AjPPatComp AjOPatComp* 137 138 139 140 141 /* @data AjPPatternSeq ******************************************************** 142 ** 143 ** Ajax sequence pattern object. 144 ** 145 ** Holds definition of feature pattern. Regular expression patterns ignore 146 ** mismatch value. 147 ** 148 ** @alias AjSPatternSeq 149 ** @alias AjOPatternSeq 150 ** 151 ** @attr Name [AjPStr] Name. 152 ** @attr Pattern [AjPStr] Pattern in string format. 153 ** @attr Compiled [void*] Compiled version of the pattern. 154 ** @attr Protein [AjBool] True if protein pattern 155 ** @attr Mismatch [ajuint] Mismatch value. 156 ** 157 ** @new ajPatternSeqNew Default constructor 158 ** @delete ajPatternSeqDel Default destructor 159 ** @use ajPatternSeqGetName Returns name. 160 ** @use ajPatternSeqGetPattern Returns pattern in string format. 161 ** @use ajPatternSeqGetCompiled Returns pointer to compiled pattern. 162 ** @use ajPatternSeqGetType Returns type as integer value. 163 ** @use ajPatternSeqGetMismatch Return mismatch value. 164 ** @@ 165 ******************************************************************************/ 166 167 typedef struct AjSPatternSeq 168 { 169 AjPStr Name; 170 AjPStr Pattern; 171 void* Compiled; 172 AjBool Protein; 173 ajuint Mismatch; 174 } AjOPatternSeq; 175 176 #define AjPPatternSeq AjOPatternSeq* 177 178 179 180 181 /* @data AjPPatternRegex ****************************************************** 182 ** 183 ** Ajax regular expression pattern object. 184 ** 185 ** Holds definition of feature pattern. Regular expression patterns ignore 186 ** mismatch value. 187 ** 188 ** @alias AjSPatternRegex 189 ** @alias AjOPatternRegex 190 ** 191 ** @attr Name [AjPStr] Name. 192 ** @attr Pattern [AjPStr] Pattern in string format. 193 ** @attr Compiled [AjPRegexp] Compiled version of the pattern. 194 ** @attr Type [ajuint] Type. 195 ** @attr Padding [char[4]] Padding to alignment boundary 196 ** 197 ** @new ajPatternRegexNew Default constructor 198 ** @delete ajPatternRegexDel Default destructor 199 ** @use ajPatternRegexGetName Returns name. 200 ** @use ajPatternRegexGetPattern Returns pattern in string format. 201 ** @use ajPatternRegexGetCompiled Returns pointer to compiled pattern. 202 ** @use ajPatternRegexGetType Returns type as integer value. 203 ** @@ 204 ******************************************************************************/ 205 206 typedef struct AjSPatternRegex 207 { 208 AjPStr Name; 209 AjPStr Pattern; 210 AjPRegexp Compiled; 211 ajuint Type; 212 char Padding[4]; 213 } AjOPatternRegex; 214 215 #define AjPPatternRegex AjOPatternRegex* 216 217 218 219 220 /* 221 ** type can be 0: string, 1: prosite (protein) 2: prosite like (nucleotide) 222 */ 223 #define AJ_PAT_TYPE_STRING 0 224 #define AJ_PAT_TYPE_PRO 1 225 #define AJ_PAT_TYPE_NUCL 2 226 227 228 229 230 /* @data AjPPatlistSeq ******************************************************** 231 ** 232 ** Ajax Pattern List object. 233 ** 234 ** Holds list of feature patterns and general information of them. 235 ** 236 ** @alias AjSPatlistSeq 237 ** @alias AjOPatlistSeq 238 ** 239 ** @attr Patlist [AjPList] List for patterns. 240 ** @attr Iter [AjIList] List iterator. 241 ** @attr Protein [AjBool] True if protein 242 ** @attr Padding [char[4]] Padding to alignment boundary 243 ** 244 ** @new ajPatlistNew Default constructor. 245 ** @delete ajPatlistDel Default destructor. 246 ** @modify ajPatlistRegexRead Reads the pattern file and fills the list. 247 ** @modify ajPatlistSeqRead Reads the pattern file and fills the list. 248 ** @modify ajPatlistRewind Restarts the iteration loop. 249 ** @modify ajPatlistAdd Adds new pattern into list. 250 ** @use ajPatlistGetNext Gets the next pattern from file and returns true if 251 ** available and false if not. 252 ** @@ 253 ******************************************************************************/ 254 255 typedef struct AjSPatlistSeq 256 { 257 AjPList Patlist; 258 AjIList Iter; 259 AjBool Protein; 260 char Padding[4]; 261 } AjOPatlistSeq; 262 263 #define AjPPatlistSeq AjOPatlistSeq* 264 265 266 267 268 /* @data AjPPatlistRegex ****************************************************** 269 ** 270 ** Ajax Pattern List object. 271 ** 272 ** Holds list of feature patterns and general information of them. 273 ** 274 ** @alias AjSPatlistRegex 275 ** @alias AjOPatlistRegex 276 ** 277 ** @attr Patlist [AjPList] List for patterns. 278 ** @attr Iter [AjIList] List iterator. 279 ** @attr Type [ajuint] Type of expression 280 ** @attr Padding [char[4]] Padding to alignment boundary 281 ** 282 ** @new ajPatlistNew Default constructor. 283 ** @delete ajPatlistDel Default destructor. 284 ** @modify ajPatlistRegexRead Reads the pattern file and fills the list. 285 ** @modify ajPatlistSeqRead Reads the pattern file and fills the list. 286 ** @modify ajPatlistRewind Restarts the iteration loop. 287 ** @modify ajPatlistAdd Adds new pattern into list. 288 ** @use ajPatlistGetNext Gets the next pattern from file and returns true if 289 ** available and false if not. 290 ** @@ 291 ******************************************************************************/ 292 293 typedef struct AjSPatlistRegex 294 { 295 AjPList Patlist; 296 AjIList Iter; 297 ajuint Type; 298 char Padding[4]; 299 } AjOPatlistRegex; 300 301 #define AjPPatlistRegex AjOPatlistRegex* 302 303 304 305 306 /* ========================================================================= */ 307 /* =========================== public functions ============================ */ 308 /* ========================================================================= */ 309 310 311 312 313 /* 314 ** Prototype definitions 315 */ 316 317 AjPPatternSeq ajPatternSeqNewList(AjPPatlistSeq plist, const AjPStr name, 318 const AjPStr pat, ajuint mismatch); 319 void ajPatternSeqDel(AjPPatternSeq* pthys); 320 const AjPStr ajPatternSeqGetName(const AjPPatternSeq thys); 321 const AjPStr ajPatternSeqGetPattern(const AjPPatternSeq thys); 322 AjPPatComp ajPatternSeqGetCompiled(const AjPPatternSeq thys); 323 AjBool ajPatternSeqGetProtein(const AjPPatternSeq thys); 324 ajuint ajPatternSeqGetMismatch(const AjPPatternSeq thys); 325 void ajPatternSeqSetCompiled(AjPPatternSeq thys, void* pat); 326 void ajPatternSeqDebug(const AjPPatternSeq pat); 327 328 AjPPatternRegex ajPatternRegexNewList(AjPPatlistRegex plist, 329 const AjPStr name, 330 const AjPStr pat); 331 void ajPatternRegexDel(AjPPatternRegex* pthys); 332 const AjPStr ajPatternRegexGetName(const AjPPatternRegex thys); 333 const AjPStr ajPatternRegexGetPattern(const AjPPatternRegex thys); 334 AjPRegexp ajPatternRegexGetCompiled(const AjPPatternRegex thys); 335 ajuint ajPatternRegexGetType(const AjPPatternRegex thys); 336 void ajPatternRegexSetCompiled(AjPPatternRegex thys, AjPRegexp pat); 337 void ajPatternRegexDebug(const AjPPatternRegex pat); 338 339 /* Patlist handling functions */ 340 AjPPatlistSeq ajPatlistSeqNewType(AjBool Protein); 341 AjPPatlistRegex ajPatlistRegexNewType(ajuint type); 342 AjPPatlistRegex ajPatlistRegexNew(void); 343 AjPPatlistSeq ajPatlistSeqNew(void); 344 void ajPatlistSeqDel(AjPPatlistSeq* pthys); 345 void ajPatlistRegexDel(AjPPatlistRegex* pthys); 346 AjPPatlistRegex ajPatlistRegexRead(const AjPStr patspec, 347 const AjPStr patname, 348 const AjPStr fmt, 349 ajuint type, AjBool upper, AjBool lower); 350 AjPPatlistSeq ajPatlistSeqRead(const AjPStr patspec, 351 const AjPStr patname, 352 const AjPStr fmt, 353 AjBool protein, ajuint mismatches); 354 AjBool ajPatlistRegexGetNext(AjPPatlistRegex thys, 355 AjPPatternRegex* pattern); 356 AjBool ajPatlistSeqGetNext(AjPPatlistSeq thys, 357 AjPPatternSeq* pattern); 358 void ajPatlistSeqRewind(AjPPatlistSeq thys); 359 void ajPatlistRegexRewind(AjPPatlistRegex thys); 360 void ajPatlistSeqRemoveCurrent(AjPPatlistSeq thys); 361 void ajPatlistRegexRemoveCurrent(AjPPatlistRegex thys); 362 void ajPatlistAddRegex(AjPPatlistRegex thys, AjPPatternRegex pat); 363 void ajPatlistAddSeq(AjPPatlistSeq thys, AjPPatternSeq pat); 364 ajuint ajPatlistSeqGetSize(const AjPPatlistSeq plist); 365 ajuint ajPatlistRegexGetSize(const AjPPatlistRegex plist); 366 ajuint ajPatlistRegexDoc(AjPPatlistRegex thys, AjPStr* pdoc); 367 ajuint ajPatlistSeqDoc(AjPPatlistSeq thys, AjPStr* pdoc); 368 369 AjPPatComp ajPatCompNew(void); 370 void ajPatCompDel(AjPPatComp* pthys); 371 ajuint ajPatternRegexType(const AjPStr type); 372 373 /* 374 ** End of prototype definitions 375 */ 376 377 378 379 380 AJ_END_DECLS 381 382 #endif /* !AJPAT_H */ 383