1 /* $Id: regiondbdef.h 222139 2020-03-13 00:15:01Z twu $ */ 2 #ifndef REGIONDBDEF_INCLUDED 3 #define REGIONDBDEF_INCLUDED 4 #ifdef HAVE_CONFIG_H 5 #include <config.h> /* For HAVE_PTHREAD */ 6 #endif 7 8 #include "genomicpos.h" 9 #include "access.h" 10 #include "types.h" 11 12 #ifdef PMAP 13 #include "alphabet.h" 14 #endif 15 16 17 #define BADVAL (Univcoord_T) -1 18 19 /* Compression types */ 20 #define NO_COMPRESSION 0 21 #define BITPACK64_COMPRESSION 1 22 23 24 /* A regiondb has a series of regional offsets and regional positions blocks. 25 It is designed to allow GSNAP to find 6-mers within a given genomic 26 region quickly. Starting with the 2020 versions of this package, 27 the offsets and positions blocks are not compressed. 28 The fixed size of offsets and positions blocks allows them to be 29 located without any tables. 30 31 A region is 65536 bp in the genome, and covers 4^8 = 65536 32 distinct oligos, where oligos are 6-mers. The positions in a region 33 can be represented by an unsigned short with 16 bits, because 34 2^16 = 65536. The regional offsets are a set of 4^6 pointers into 35 the regional positions. These offsets require 4^6 = 4096 unsigned 36 shorts. 37 38 To enhance memory access, the offsets and positions for a region 39 are placed next to each other, occupying 4096 + 65536 = 69632 40 unsigned shorts. 41 */ 42 43 44 #ifdef LARGE_GENOMES 45 #define REGION_LENGTH 65536ULL /* 2^16 */ 46 #else 47 #define REGION_LENGTH 65536U /* 2^16 */ 48 #endif 49 50 51 #define T Regiondb_T 52 struct T { 53 #ifdef PMAP 54 Alphabet_T alphabet; 55 int alphabet_size; 56 #endif 57 58 Width_T region1part; /* generally 6 */ 59 Width_T region1interval; /* always 1 */ 60 size_t offsets_size; /* e.g., 4^6 = 4096 */ 61 size_t region_size; /* offsets_size + REGION_LENGTH */ 62 63 Access_T regiondb_access; 64 int regiondb_shmid; 65 key_t regiondb_key; 66 int regiondb_fd; 67 size_t regiondb_len; 68 UINT2 *regiondb; /* Values range from 0..65535 */ 69 }; 70 71 #undef T 72 #endif 73 74