1 /* @include embindex ********************************************************** 2 ** 3 ** B+ Tree Indexing plus Disc Cache. 4 ** 5 ** @author Copyright (c) 2003 Alan Bleasby 6 ** @version $Revision: 1.34 $ 7 ** @modified $Date: 2012/05/24 16:57:10 $ by $Author: rice $ 8 ** @@ 9 ** 10 ** This library is free software; you can redistribute it and/or 11 ** modify it under the terms of the GNU Lesser General Public 12 ** License as published by the Free Software Foundation; either 13 ** version 2.1 of the License, or (at your option) any later version. 14 ** 15 ** This library is distributed in the hope that it will be useful, 16 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 17 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 ** Lesser General Public License for more details. 19 ** 20 ** You should have received a copy of the GNU Lesser General Public 21 ** License along with this library; if not, write to the Free Software 22 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 23 ** MA 02110-1301, USA. 24 ** 25 ******************************************************************************/ 26 27 #ifndef EMBINDEX_H 28 #define EMBINDEX_H 29 30 31 32 /* ========================================================================= */ 33 /* ============================= include files ============================= */ 34 /* ========================================================================= */ 35 36 #include "ajdefine.h" 37 #include "ajstr.h" 38 #include "ajlist.h" 39 #include "ajindex.h" 40 #include "ajreg.h" 41 42 AJ_BEGIN_DECLS 43 44 45 46 47 /* ========================================================================= */ 48 /* =============================== constants =============================== */ 49 /* ========================================================================= */ 50 51 52 53 54 /* ========================================================================= */ 55 /* ============================== public data ============================== */ 56 /* ========================================================================= */ 57 58 59 60 61 /* @data EmbPBtreeEntry ******************************************************* 62 ** 63 ** Index tree entries 64 ** 65 ** @alias EmbOBtreeEntry 66 ** @alias EmbSBtreeEntry 67 ** 68 ** @attr dbname [AjPStr] Database name 69 ** @attr dbrs [AjPStr] Index resource definition 70 ** @attr release [AjPStr] Release number 71 ** @attr date [AjPStr] Release date 72 ** @attr dbtype [AjPStr] Database type 73 ** @attr directory [AjPStr] Database directory 74 ** @attr idirectory [AjPStr] Index directory 75 ** @attr idextension [AjPStr] Id index extension 76 ** @attr maxid [AjPStr] Longest id in data 77 ** @attr files [AjPList] List of data filenames 78 ** @attr reffiles [AjPList*] Lists of data reference filenames 79 ** @attr fields [AjPList] EMBOSS index field structures 80 ** @attr id [AjPStr] Entry identifier 81 ** @attr idcache [AjPBtcache] Id cache structure 82 ** @attr pripagecount [ajlong] Cache primary page count 83 ** @attr secpagecount [ajlong] Cache secondary page count 84 ** @attr do_id [AjBool] If true, build id index 85 ** @attr compressed [AjBool] If true, compress id index 86 ** @attr nfiles [ajuint] Data file count 87 ** @attr refcount [ajuint] Reference file(s) for each entry 88 ** @attr idlen [ajuint] Maximum id length in index 89 ** @attr idmaxlen [ajuint] Maximum id length in data 90 ** @attr idtruncate [ajuint] Number of ids truncated 91 ** @attr pripagesize [ajuint] Default page size 92 ** @attr pricachesize [ajuint] Defalt cache size 93 ** @attr idorder [ajuint] Id index primary order 94 ** @attr idfill [ajuint] Id index primary fill count 95 ** @attr secpagesize [ajuint] Default page size 96 ** @attr seccachesize [ajuint] Defalt cache size 97 ** @attr idsecorder [ajuint] Id index secondary order 98 ** @attr idsecfill [ajuint] Id index secondary fill count 99 ** @attr fpos [ajlong] Input file position 100 ** @attr reffpos [ajlong*] Input extra (reference) file positions 101 ******************************************************************************/ 102 103 typedef struct EmbSBtreeEntry 104 { 105 AjPStr dbname; 106 AjPStr dbrs; 107 AjPStr release; 108 AjPStr date; 109 AjPStr dbtype; 110 111 AjPStr directory; 112 AjPStr idirectory; 113 AjPStr idextension; 114 AjPStr maxid; 115 116 AjPList files; 117 AjPList *reffiles; 118 AjPList fields; 119 120 AjPStr id; 121 AjPBtcache idcache; 122 ajlong pripagecount; 123 ajlong secpagecount; 124 125 AjBool do_id; 126 AjBool compressed; 127 128 ajuint nfiles; 129 130 ajuint refcount; 131 132 ajuint idlen; 133 ajuint idmaxlen; 134 ajuint idtruncate; 135 136 ajuint pripagesize; 137 ajuint pricachesize; 138 139 ajuint idorder; 140 ajuint idfill; 141 142 ajuint secpagesize; 143 ajuint seccachesize; 144 145 ajuint idsecorder; 146 ajuint idsecfill; 147 148 149 ajlong fpos; 150 ajlong *reffpos; 151 152 } EmbOBtreeEntry; 153 #define EmbPBtreeEntry EmbOBtreeEntry* 154 155 156 /* @data EmbPBtreeField ******************************************************* 157 ** 158 ** Index tree entries 159 ** 160 ** @alias EmbSBtreeField 161 ** @alias EmbOBtreeField 162 ** 163 ** @attr cache [AjPBtcache] Cache structure 164 ** @attr data [AjPList] Keywords to index 165 ** @attr name [AjPStr] File basename 166 ** @attr extension [AjPStr] File extension 167 ** @attr maxkey [AjPStr] Longest keyword found 168 ** @attr freelist [AjPStr*] Free data elements for reuse 169 ** @attr pripagecount [ajulong] Index primary page count 170 ** @attr secpagecount [ajulong] Index secondary page count 171 ** @attr pripagesize [ajuint] Index primary page size 172 ** @attr secpagesize [ajuint] Index secondary page size 173 ** @attr pricachesize [ajuint] Index primary cache size 174 ** @attr seccachesize [ajuint] Index secondary cache size 175 ** @attr order [ajuint] Primary page order 176 ** @attr fill [ajuint] Primary page fill count 177 ** @attr secorder [ajuint] Secondary page order 178 ** @attr secfill [ajuint] Secondary page fill count 179 ** @attr refcount [ajuint] Number of reference file(s) per entry 180 ** @attr len [ajuint] Maximum keyword length in index 181 ** @attr idlen [ajuint] Maximum id length in index 182 ** @attr maxlen [ajuint] Maximum keyword length in data 183 ** @attr truncate [ajuint] Number of keywords truncated 184 ** @attr freecount [ajuint] Free list used 185 ** @attr freesize [ajuint] Free list size 186 ** @attr secondary [AjBool] Secondary index if true 187 ** @attr compressed [AjBool] Compress index if true 188 ** @attr Padding [char[4]] Padding to alignment boundary 189 ******************************************************************************/ 190 191 typedef struct EmbSBtreeField 192 { 193 AjPBtcache cache; 194 AjPList data; 195 AjPStr name; 196 AjPStr extension; 197 AjPStr maxkey; 198 AjPStr *freelist; 199 ajulong pripagecount; 200 ajulong secpagecount; 201 ajuint pripagesize; 202 ajuint secpagesize; 203 ajuint pricachesize; 204 ajuint seccachesize; 205 ajuint order; 206 ajuint fill; 207 ajuint secorder; 208 ajuint secfill; 209 ajuint refcount; 210 ajuint len; 211 ajuint idlen; 212 ajuint maxlen; 213 ajuint truncate; 214 ajuint freecount; 215 ajuint freesize; 216 AjBool secondary; 217 AjBool compressed; 218 char Padding[4]; 219 } EmbOBtreeField; 220 #define EmbPBtreeField EmbOBtreeField* 221 222 /* ========================================================================= */ 223 /* =========================== public functions ============================ */ 224 /* ========================================================================= */ 225 226 227 228 /* 229 ** Prototype definitions 230 */ 231 232 void embBtreeIndexEntry(EmbPBtreeEntry entry, 233 ajuint dbno); 234 void embBtreeIndexField(EmbPBtreeField field, 235 const EmbPBtreeEntry entry, 236 ajuint dbno); 237 ajuint embBtreeIndexPrimary(EmbPBtreeField field, 238 const EmbPBtreeEntry entry, 239 ajuint dbno); 240 ajuint embBtreeIndexSecondary(EmbPBtreeField field, 241 const EmbPBtreeEntry entry); 242 243 void embBtreeFindEmblAc(const AjPStr readline, EmbPBtreeField field, 244 AjPStr *Pstr); 245 void embBtreeParseEmblAc(const AjPStr readline, EmbPBtreeField field); 246 void embBtreeParseEmblDe(const AjPStr readline, EmbPBtreeField field); 247 void embBtreeParseEmblKw(const AjPStr readline, EmbPBtreeField field); 248 void embBtreeParseEmblSv(const AjPStr readline, EmbPBtreeField field); 249 void embBtreeParseEmblTx(const AjPStr readline, EmbPBtreeField field); 250 void embBtreeParseFastaAc(const AjPStr readline, EmbPBtreeField field); 251 void embBtreeParseFastaDe(const AjPStr readline, EmbPBtreeField field); 252 void embBtreeParseFastaSv(const AjPStr readline, EmbPBtreeField field); 253 void embBtreeParseGenbankAc(const AjPStr readline, EmbPBtreeField field); 254 void embBtreeParseGenbankDe(const AjPStr readline, EmbPBtreeField field); 255 void embBtreeParseGenbankKw(const AjPStr readline, EmbPBtreeField field); 256 void embBtreeParseGenbankTx(const AjPStr readline, EmbPBtreeField field); 257 258 void embBtreeParseEntry(const AjPStr readline, AjPRegexp regexp, 259 EmbPBtreeEntry entry); 260 void embBtreeParseField(const AjPStr readline, AjPRegexp regexp, 261 EmbPBtreeField field); 262 void embBtreeParseFieldSecond(const AjPStr readline, AjPRegexp regexp, 263 EmbPBtreeField field); 264 void embBtreeParseFieldThird(const AjPStr readline, AjPRegexp regexp, 265 EmbPBtreeField field); 266 void embBtreeParseFieldTrim(const AjPStr readline, AjPRegexp regexp, 267 EmbPBtreeField field); 268 void embBtreeReportEntry(AjPFile outf, const EmbPBtreeEntry entry); 269 void embBtreeReportField(AjPFile outf, const EmbPBtreeField field); 270 void embBtreeEmblAC(const AjPStr acline, AjPList aclist); 271 void embBtreeEmblKW(const AjPStr kwline, AjPList kwlist, ajuint maxlen); 272 void embBtreeEmblDE(const AjPStr deline, AjPList delist, ajuint maxlen); 273 void embBtreeEmblSV(const AjPStr idline, AjPList svlist); 274 void embBtreeEmblTX(const AjPStr kwline, AjPList kwlist, ajuint maxlen); 275 void embBtreeGenBankAC(const AjPStr acline, AjPList aclist); 276 void embBtreeGenBankKW(const AjPStr kwline, AjPList kwlist, ajuint maxlen); 277 void embBtreeGenBankDE(const AjPStr kwline, AjPList kwlist, ajuint maxlen); 278 void embBtreeGenBankTX(const AjPStr kwline, AjPList kwlist, ajuint maxlen); 279 280 void embBtreeFastaDE(const AjPStr kwline, AjPList kwlist, ajuint maxlen); 281 void embBtreeFastaSV(const AjPStr kwline, AjPList kwlist, ajuint maxlen); 282 283 284 ajuint embBtreeReadDir(AjPStr **filelist, const AjPStr fdirectory, 285 const AjPStr files, const AjPStr exclude); 286 EmbPBtreeEntry embBtreeEntryNew(ajuint refcount); 287 void embBtreeEntrySetCompressed(EmbPBtreeEntry entry); 288 ajuint embBtreeSetFields(EmbPBtreeEntry entry, AjPStr const * fields); 289 void embBtreeEntryDel(EmbPBtreeEntry *thys); 290 void embBtreeSetDbInfo(EmbPBtreeEntry entry, const AjPStr name, 291 const AjPStr dbrs, 292 const AjPStr date, const AjPStr release, 293 const AjPStr type, const AjPStr directory, 294 const AjPStr idirectory); 295 ajuint embBtreeGetFiles(EmbPBtreeEntry entry, const AjPStr fdirectory, 296 const AjPStr files, const AjPStr exclude); 297 AjBool embBtreeWriteEntryFile(const EmbPBtreeEntry entry); 298 void embBtreeGetRsInfo(EmbPBtreeEntry entry); 299 AjBool embBtreeOpenCaches(EmbPBtreeEntry entry); 300 AjBool embBtreeCloseCaches(EmbPBtreeEntry entry); 301 AjBool embBtreeDumpParameters(EmbPBtreeEntry entry); 302 303 EmbPBtreeField embBtreeFieldNewC(const char* nametxt); 304 EmbPBtreeField embBtreeFieldNewS(const AjPStr name, ajuint refcount); 305 void embBtreeFieldDel(EmbPBtreeField *Pthis); 306 AjBool embBtreeFieldGetdataS(EmbPBtreeField field, AjPStr *Pstr); 307 void embBtreeFieldSetCompressed(EmbPBtreeField field); 308 void embBtreeFieldSetSecondary(EmbPBtreeField field); 309 void embBtreeFieldSetIdtype(EmbPBtreeField field); 310 EmbPBtreeField embBtreeGetFieldC(EmbPBtreeEntry entry, const char * nametxt); 311 EmbPBtreeField embBtreeGetFieldS(EmbPBtreeEntry entry, const AjPStr name); 312 313 void embIndexExit(void); 314 315 /* 316 ** End of prototype definitions 317 */ 318 319 #if 0 320 AjBool embBtreeProbeCaches(EmbPBtreeEntry entry); 321 #endif 322 323 324 AJ_END_DECLS 325 326 #endif /* !EMBINDEX_H */ 327 328