1 /* $Id: types.h 218286 2019-01-23 16:46:55Z twu $ */
2 #ifndef TYPES_INCLUDED
3 #define TYPES_INCLUDED
4 #ifdef HAVE_CONFIG_H
5 #include <config.h>		/* For SIZEOF_UNSIGNED_LONG_LONG, SIZEOF_UNSIGNED_LONG needed for HAVE_64_BIT */
6 #endif
7 
8 /* Number of bits, such as index1part or basesize.  Need to allow for negative values. */
9 typedef int Width_T;
10 
11 /* Number of entries, such as offsetscomp_blocksize */
12 typedef unsigned int Blocksize_T;
13 
14 
15 /* A 2-byte word */
16 typedef unsigned short UINT2;
17 
18 /* A 4-byte word */
19 typedef unsigned int UINT4;
20 typedef int INT4;
21 
22 
23 /* Compressed representation of genome (high, low, flags).  Always
24    UINT4.  Can think of as a genome block unit.  */
25 typedef UINT4 Genomecomp_T;
26 
27 
28 /* Can hold up to an 8-mer */
29 typedef UINT4 Localspace_T;
30 
31 /* An 8-byte word */
32 /* Oligospace_T needs to hold 1 more than maximum Storedoligomer_T.
33    If 8-byte words are not available, then maximum k-mer is 15 */
34 /* Prefer to use unsigned long long, whic should be 8 bytes on all systems */
35 #if (SIZEOF_UNSIGNED_LONG_LONG == 8)
36 #define HAVE_64_BIT 1
37 #define MAXIMUM_KMER 18
38 typedef unsigned long long UINT8;
39 typedef unsigned long long Oligospace_T;
40 
41 /*               8765432187654321 */
42 #define LEFT_A 0x0000000000000000
43 #define LEFT_C 0x4000000000000000
44 #define LEFT_G 0x8000000000000000
45 #define LEFT_T 0xC000000000000000
46 
47 /*                8765432187654321 */
48 #define RIGHT_A 0x0000000000000000
49 #define RIGHT_C 0x0000000000000001
50 #define RIGHT_G 0x0000000000000002
51 #define RIGHT_T 0x0000000000000003
52 
53 
54 #elif (SIZEOF_UNSIGNED_LONG == 8)
55 #define HAVE_64_BIT 1
56 #define MAXIMUM_KMER 18
57 typedef unsigned long UINT8;
58 typedef unsigned long Oligospace_T;
59 
60 /*               8765432187654321 */
61 #define LEFT_A 0x0000000000000000
62 #define LEFT_C 0x4000000000000000
63 #define LEFT_G 0x8000000000000000
64 #define LEFT_T 0xC000000000000000
65 
66 /*                8765432187654321 */
67 #define RIGHT_A 0x0000000000000000
68 #define RIGHT_C 0x0000000000000001
69 #define RIGHT_G 0x0000000000000002
70 #define RIGHT_T 0x0000000000000003
71 
72 
73 #else
74 #define MAXIMUM_KMER 15
75 #define OLIGOSPACE_NOT_LONG
76 typedef unsigned int Oligospace_T;
77 
78 /*               87654321 */
79 #define LEFT_A 0x00000000
80 #define LEFT_C 0x40000000
81 #define LEFT_G 0x80000000
82 #define LEFT_T 0xC0000000
83 
84 /*                87654321 */
85 #define RIGHT_A 0x00000000
86 #define RIGHT_C 0x00000001
87 #define RIGHT_G 0x00000002
88 #define RIGHT_T 0x00000003
89 
90 #endif
91 
92 /* Contents of compressed offsets file.  Storing as UINT4, even for
93    large genomes, to reduce zero-padding of bitstreams.  For large
94    genomes, need to store 64-bit Positionsptr_T quantity in 2 UINT4
95    words. */
96 typedef UINT4 Offsetscomp_T;
97 
98 #if 0
99 /* Obsolete with handling of k-mers > 16.  Use Oligospace_T instead */
100 /* Holds a k-mer.  Can be UINT4 as long as k <= 16. */
101 /* Some procedures use Shortoligomer_T, which should be the same */
102 typedef UINT4 Storedoligomer_T;
103 #else
104 typedef UINT4 Shortoligomer_T;
105 #endif
106 
107 
108 /* Definitions */
109 /* Large genome: Genomic length > 2^32, needing 8-byte Univcoord_T */
110 /* Huge genome: Entries in positions file > 2^32, needing 8-byte Positionsptr_T */
111 
112 /* An offset into the positions file of an IndexDB.  For small genomes
113    < 2^32 bp such as human, need 3 billion divided by sampling
114    interval (default 3), requiring a maximum of 32 bits or 4 bytes
115    (Positionsptr_T).  For huge genomes or more frequent sampling,
116    need 8 bytes, or Hugepositionsptr_T. */
117 #ifdef HAVE_64_BIT
118 
119 #ifdef UTILITYP
120 typedef UINT8 Hugepositionsptr_T;
121 typedef UINT4 Positionsptr_T;
122 #elif defined(LARGE_GENOMES)
123 /* Don't really need offsets to be 8-byte unless we have a huge
124    genome, but this simplifies the code */
125 typedef UINT8 Hugepositionsptr_T;
126 typedef UINT8 Positionsptr_T;
127 #else
128 typedef UINT4 Positionsptr_T;
129 #endif
130 
131 #else
132 typedef UINT4 Positionsptr_T;
133 #endif
134 
135 
136 /* For definition of Chrpos_T, see genomicpos.h */
137 
138 /* Transcriptome expected to be a small genome */
139 typedef UINT4 Trcoord_T;
140 
141 /* For univintervals and Univ_IIT (chromosome_iit) files.  Use the largest word size allowable on the machine.  */
142 #ifdef HAVE_64_BIT
143 typedef UINT8 Univ_IIT_coord_T;
144 #else
145 typedef UINT4 Univ_IIT_coord_T;
146 #endif
147 
148 typedef enum {NO_SPLICE, DONOR, ANTIDONOR, ACCEPTOR, ANTIACCEPTOR} Splicetype_T;
149 
150 /* For splicetrie */
151 typedef UINT4 Trieoffset_T;
152 typedef UINT4 Triecontent_T;
153 
154 /* For suffix array */
155 /* typedef UINT4 Sarrayptr_T; */
156 
157 #define GMAP_IMPROVEMENT 1
158 #define GMAP_ENDS 2
159 #define GMAP_PAIRSEARCH 4
160 
161 #endif
162