1 /* $Id: regiondbdef.h 222139 2020-03-13 00:15:01Z twu $ */
2 #ifndef REGIONDBDEF_INCLUDED
3 #define REGIONDBDEF_INCLUDED
4 #ifdef HAVE_CONFIG_H
5 #include <config.h>		/* For HAVE_PTHREAD */
6 #endif
7 
8 #include "genomicpos.h"
9 #include "access.h"
10 #include "types.h"
11 
12 #ifdef PMAP
13 #include "alphabet.h"
14 #endif
15 
16 
17 #define BADVAL (Univcoord_T) -1
18 
19 /* Compression types */
20 #define NO_COMPRESSION 0
21 #define BITPACK64_COMPRESSION 1
22 
23 
24 /* A regiondb has a series of regional offsets and regional positions blocks.
25    It is designed to allow GSNAP to find 6-mers within a given genomic
26    region quickly.  Starting with the 2020 versions of this package,
27    the offsets and positions blocks are not compressed.
28    The fixed size of offsets and positions blocks allows them to be
29    located without any tables.
30 
31    A region is 65536 bp in the genome, and covers 4^8 = 65536
32    distinct oligos, where oligos are 6-mers.  The positions in a region
33    can be represented by an unsigned short with 16 bits, because
34    2^16 = 65536.  The regional offsets are a set of 4^6 pointers into
35    the regional positions.  These offsets require 4^6 = 4096 unsigned
36    shorts.
37 
38    To enhance memory access, the offsets and positions for a region
39    are placed next to each other, occupying 4096 + 65536 = 69632
40    unsigned shorts.
41 */
42 
43 
44 #ifdef LARGE_GENOMES
45 #define REGION_LENGTH 65536ULL	/* 2^16 */
46 #else
47 #define REGION_LENGTH 65536U	/* 2^16 */
48 #endif
49 
50 
51 #define T Regiondb_T
52 struct T {
53 #ifdef PMAP
54   Alphabet_T alphabet;
55   int alphabet_size;
56 #endif
57 
58   Width_T region1part;		/* generally 6 */
59   Width_T region1interval;	/* always 1 */
60   size_t offsets_size;		/* e.g., 4^6 = 4096 */
61   size_t region_size;		/* offsets_size + REGION_LENGTH */
62 
63   Access_T regiondb_access;
64   int regiondb_shmid;
65   key_t regiondb_key;
66   int regiondb_fd;
67   size_t regiondb_len;
68   UINT2 *regiondb;		/* Values range from 0..65535 */
69 };
70 
71 #undef T
72 #endif
73 
74