1 /* $Id: ncbisami.h,v 6.5 2006/05/10 20:47:13 camacho Exp $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  $RCSfile: ncbisami.h,v $
27 *
28 * Author:  Sergei Shavirin
29 *
30 * Initial Version Creation Date: 02/24/1997
31 *
32 * $Revision: 6.5 $
33 *
34 * File Description:
35 *         Internal include file for ISAM library
36 *
37 * $Log: ncbisami.h,v $
38 * Revision 6.5  2006/05/10 20:47:13  camacho
39 * From Ilya Dondoshansky: Added sorting_done field to ISAMData to indicate that data is already sorted
40 *
41 * Revision 6.4  2002/01/18 18:53:13  madden
42 * Changes to research the last page if appropriate
43 *
44 * Revision 6.3  2000/07/18 19:29:28  shavirin
45 * Added new parameter test_non_unique to suppress check for non-unique
46 * strings ids in the database - default - TRUE.
47 *
48 * Revision 6.2  1999/08/25 20:19:24  shavirin
49 * Added parameter for user options to the internal structure.
50 *
51 * Revision 6.1  1999/02/19 22:01:59  madden
52 * Add NlmMFILEPtr to ISAMData typedef
53 *
54 * Revision 6.0  1997/08/25 18:53:32  madden
55 * Revision changed to 6.0
56 *
57 * Revision 1.6  1997/05/12 19:55:32  shavirin
58 * Some fixes type-changes to support ISAMCreateDatabase() API
59 *
60 * Revision 1.5  1997/05/07 21:14:18  shavirin
61 * Added definitions for fields array encoding and ISAMCreateDatabase()
62 * function.
63 *
64 * Revision 1.4  1997/05/06 21:36:50  shavirin
65 * Added definitions of functions for Codded Array compression
66 * implementation
67 *
68  * Revision 1.3  1997/05/05  18:17:35  shavirin
69  * Added support for platforms without memory mapping
70  *
71  * Revision 1.2  1997/05/01  17:26:16  shavirin
72  * Added String ISAM index functionality
73  *
74  * Revision 1.1  1997/02/24  21:07:17  shavirin
75  * Initial revision
76  *
77 *
78 * ==========================================================================
79 */
80 
81 #ifndef _NCBISAMI_H_
82 #define _NCBISAMI_H_ ncbisami
83 
84 /****************************************************************************/
85 /* INCLUDES */
86 /****************************************************************************/
87 
88 #include <ncbisam.h>
89 
90 /****************************************************************************/
91 /* INTERNAL FINCTION DEFINITIONS */
92 /****************************************************************************/
93 
94 #ifdef __cplusplus
95 extern "C" {
96 #endif
97 
98 #ifdef __cplusplus
99 }
100 #endif
101 /****************************************************************************/
102 /* DEFINES */
103 /****************************************************************************/
104 
105 #define MAX_FILENAME_LEN 256
106 #define LINE_SIZE_CHUNK  4096
107 #define BUFF_SIZE_CHUNK  1024
108 #define UID_NUM_CHUNK    1024
109 
110 #define ISAM_VERSION 1
111 #define ISAM_DATA_CHAR '\2'
112 #define ENDS_ISAM_KEY(Ptr) ((*Ptr == NULLB) || (*Ptr == ISAM_DATA_CHAR) || (*Ptr == '\n') || (*Ptr == '\r'))
113 
114 #define Log2(N) (log(N)/log(2.0))
115 #define CA_TMP_CHUNK 4096
116 #define FA_Mask 0x7F
117 #define DEFAULT_CA_MAX_OFFSET 10000000
118 
119 /****************************************************************************/
120 /* TYPEDEFS */
121 /****************************************************************************/
122 
123 typedef struct NISAMKeyData
124 {
125     Uint4 key;
126     Uint4 data;
127 } NISAMKeyData, PNTR NISAMKeyDataPtr;
128 
129 typedef struct ISAMUidField
130 {
131     Uint4 uid;
132     Uint4 field;
133 } ISAMUidField, PNTR ISAMUidFieldPtr;
134 
135 typedef struct ISAMData
136 {
137     Int4            type;           /* Type of ISAM index */
138 
139     CharPtr         DBFileName;     /* Filename of database file */
140     CharPtr         IndexFileName;  /* Filename of ISAM index file */
141 
142     CharPtr         CAName;         /* Common filename-directory for CA */
143     CharPtr         CADBExt;        /* Extention for CA/FA files */
144     CharPtr         CAOffExt;       /* Extention for CA-Offset files */
145     Int4            CAMaxOffset;    /* Offset for switch CA DB/Offset file */
146 
147     Nlm_MemMapPtr   mmp;            /* Memory map pointer to index file */
148     NlmMFILEPtr     mfp;            /* Memory map pointer to database file for numeric search. */
149     CharPtr         FileStart;      /* Pointer to index file if no memmap */
150     Int4            NumTerms;       /* Number of terms in database */
151     Int4            NumSamples;     /* Number of terms in ISAM index */
152     Int4            PageSize;       /* Page size of ISAM index */
153     FILE            *db_fd;         /* File pointer of ISAM database */
154     Boolean         initialized;    /* Is this structure was initialized
155                                        for ISAM Search ? */
156     Uint4Ptr        KeySamples;     /* Pointer to first sample offset
157                                        in ISAM index */
158     NISAMKeyDataPtr KeyDataSamples; /* Pointer to first NISAMKeyData structure
159                                        in ISAM index (for search with data) */
160 
161     CharPtr         line;           /* Temporary buffer to work with strings */
162     Int4            max_line_size;  /* Maximum string length in the database */
163     Int4            idx_option;    /* Options set by upper layer */
164     Boolean       test_non_unique; /* Check if data for String ISAM sorted */
165 	/* the following values are used to find gi's on a page that was recently searched. */
166     NISAMKeyDataPtr lastKeyDataPage;	/* last page searched. */
167     Int4 first_gi, last_gi;		/* first and last gi's of last page. */
168     Int4 first, last;		/* first and last offset's of last page. */
169     Boolean sorting_done; /* Is data already sorted? */
170 } ISAMData, PNTR ISAMDataPtr;
171 
172 typedef struct ISAMTmpCA
173 {
174     Uint1Ptr buffer;  /* Buffer with coded array bytes */
175     Int4 allocated;   /* Memory size allocated for the buffer */
176     Int4 length;      /* Final length of CA buffer */
177     Int4 num_uids;    /* Number of coded uids in CA */
178     Int4 num_bits;    /* Number of bits used for CA compression */
179     Int4 byte_num;    /* Temorary value for into CA buffer */
180     Int4 bit_num;     /* Temporary value for CA buffer */
181 } ISAMTmpCA, PNTR ISAMTmpCAPtr;
182 
183 static Uint4 PowersOfTwo[] = {01,02,04,010,020,040,0100,0200,0400,01000,
184                                02000,04000,010000,020000,040000,0100000,
185                                0200000,0400000,01000000,02000000,
186                                04000000,010000000,020000000,040000000,
187                                0100000000,0200000000,0400000000,
188                                01000000000,02000000000,
189                                04000000000,010000000000, 020000000000};
190 
191 static Uint1 OneBit[] = {0x80 , 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
192 
193 #endif
194 
195 
196 
197 
198