1 /* $Id: ncbisami.h,v 6.5 2006/05/10 20:47:13 camacho Exp $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * File Name: $RCSfile: ncbisami.h,v $ 27 * 28 * Author: Sergei Shavirin 29 * 30 * Initial Version Creation Date: 02/24/1997 31 * 32 * $Revision: 6.5 $ 33 * 34 * File Description: 35 * Internal include file for ISAM library 36 * 37 * $Log: ncbisami.h,v $ 38 * Revision 6.5 2006/05/10 20:47:13 camacho 39 * From Ilya Dondoshansky: Added sorting_done field to ISAMData to indicate that data is already sorted 40 * 41 * Revision 6.4 2002/01/18 18:53:13 madden 42 * Changes to research the last page if appropriate 43 * 44 * Revision 6.3 2000/07/18 19:29:28 shavirin 45 * Added new parameter test_non_unique to suppress check for non-unique 46 * strings ids in the database - default - TRUE. 47 * 48 * Revision 6.2 1999/08/25 20:19:24 shavirin 49 * Added parameter for user options to the internal structure. 50 * 51 * Revision 6.1 1999/02/19 22:01:59 madden 52 * Add NlmMFILEPtr to ISAMData typedef 53 * 54 * Revision 6.0 1997/08/25 18:53:32 madden 55 * Revision changed to 6.0 56 * 57 * Revision 1.6 1997/05/12 19:55:32 shavirin 58 * Some fixes type-changes to support ISAMCreateDatabase() API 59 * 60 * Revision 1.5 1997/05/07 21:14:18 shavirin 61 * Added definitions for fields array encoding and ISAMCreateDatabase() 62 * function. 63 * 64 * Revision 1.4 1997/05/06 21:36:50 shavirin 65 * Added definitions of functions for Codded Array compression 66 * implementation 67 * 68 * Revision 1.3 1997/05/05 18:17:35 shavirin 69 * Added support for platforms without memory mapping 70 * 71 * Revision 1.2 1997/05/01 17:26:16 shavirin 72 * Added String ISAM index functionality 73 * 74 * Revision 1.1 1997/02/24 21:07:17 shavirin 75 * Initial revision 76 * 77 * 78 * ========================================================================== 79 */ 80 81 #ifndef _NCBISAMI_H_ 82 #define _NCBISAMI_H_ ncbisami 83 84 /****************************************************************************/ 85 /* INCLUDES */ 86 /****************************************************************************/ 87 88 #include <ncbisam.h> 89 90 /****************************************************************************/ 91 /* INTERNAL FINCTION DEFINITIONS */ 92 /****************************************************************************/ 93 94 #ifdef __cplusplus 95 extern "C" { 96 #endif 97 98 #ifdef __cplusplus 99 } 100 #endif 101 /****************************************************************************/ 102 /* DEFINES */ 103 /****************************************************************************/ 104 105 #define MAX_FILENAME_LEN 256 106 #define LINE_SIZE_CHUNK 4096 107 #define BUFF_SIZE_CHUNK 1024 108 #define UID_NUM_CHUNK 1024 109 110 #define ISAM_VERSION 1 111 #define ISAM_DATA_CHAR '\2' 112 #define ENDS_ISAM_KEY(Ptr) ((*Ptr == NULLB) || (*Ptr == ISAM_DATA_CHAR) || (*Ptr == '\n') || (*Ptr == '\r')) 113 114 #define Log2(N) (log(N)/log(2.0)) 115 #define CA_TMP_CHUNK 4096 116 #define FA_Mask 0x7F 117 #define DEFAULT_CA_MAX_OFFSET 10000000 118 119 /****************************************************************************/ 120 /* TYPEDEFS */ 121 /****************************************************************************/ 122 123 typedef struct NISAMKeyData 124 { 125 Uint4 key; 126 Uint4 data; 127 } NISAMKeyData, PNTR NISAMKeyDataPtr; 128 129 typedef struct ISAMUidField 130 { 131 Uint4 uid; 132 Uint4 field; 133 } ISAMUidField, PNTR ISAMUidFieldPtr; 134 135 typedef struct ISAMData 136 { 137 Int4 type; /* Type of ISAM index */ 138 139 CharPtr DBFileName; /* Filename of database file */ 140 CharPtr IndexFileName; /* Filename of ISAM index file */ 141 142 CharPtr CAName; /* Common filename-directory for CA */ 143 CharPtr CADBExt; /* Extention for CA/FA files */ 144 CharPtr CAOffExt; /* Extention for CA-Offset files */ 145 Int4 CAMaxOffset; /* Offset for switch CA DB/Offset file */ 146 147 Nlm_MemMapPtr mmp; /* Memory map pointer to index file */ 148 NlmMFILEPtr mfp; /* Memory map pointer to database file for numeric search. */ 149 CharPtr FileStart; /* Pointer to index file if no memmap */ 150 Int4 NumTerms; /* Number of terms in database */ 151 Int4 NumSamples; /* Number of terms in ISAM index */ 152 Int4 PageSize; /* Page size of ISAM index */ 153 FILE *db_fd; /* File pointer of ISAM database */ 154 Boolean initialized; /* Is this structure was initialized 155 for ISAM Search ? */ 156 Uint4Ptr KeySamples; /* Pointer to first sample offset 157 in ISAM index */ 158 NISAMKeyDataPtr KeyDataSamples; /* Pointer to first NISAMKeyData structure 159 in ISAM index (for search with data) */ 160 161 CharPtr line; /* Temporary buffer to work with strings */ 162 Int4 max_line_size; /* Maximum string length in the database */ 163 Int4 idx_option; /* Options set by upper layer */ 164 Boolean test_non_unique; /* Check if data for String ISAM sorted */ 165 /* the following values are used to find gi's on a page that was recently searched. */ 166 NISAMKeyDataPtr lastKeyDataPage; /* last page searched. */ 167 Int4 first_gi, last_gi; /* first and last gi's of last page. */ 168 Int4 first, last; /* first and last offset's of last page. */ 169 Boolean sorting_done; /* Is data already sorted? */ 170 } ISAMData, PNTR ISAMDataPtr; 171 172 typedef struct ISAMTmpCA 173 { 174 Uint1Ptr buffer; /* Buffer with coded array bytes */ 175 Int4 allocated; /* Memory size allocated for the buffer */ 176 Int4 length; /* Final length of CA buffer */ 177 Int4 num_uids; /* Number of coded uids in CA */ 178 Int4 num_bits; /* Number of bits used for CA compression */ 179 Int4 byte_num; /* Temorary value for into CA buffer */ 180 Int4 bit_num; /* Temporary value for CA buffer */ 181 } ISAMTmpCA, PNTR ISAMTmpCAPtr; 182 183 static Uint4 PowersOfTwo[] = {01,02,04,010,020,040,0100,0200,0400,01000, 184 02000,04000,010000,020000,040000,0100000, 185 0200000,0400000,01000000,02000000, 186 04000000,010000000,020000000,040000000, 187 0100000000,0200000000,0400000000, 188 01000000000,02000000000, 189 04000000000,010000000000, 020000000000}; 190 191 static Uint1 OneBit[] = {0x80 , 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1}; 192 193 #endif 194 195 196 197 198