1 /* $Id: types.h 218286 2019-01-23 16:46:55Z twu $ */ 2 #ifndef TYPES_INCLUDED 3 #define TYPES_INCLUDED 4 #ifdef HAVE_CONFIG_H 5 #include <config.h> /* For SIZEOF_UNSIGNED_LONG_LONG, SIZEOF_UNSIGNED_LONG needed for HAVE_64_BIT */ 6 #endif 7 8 /* Number of bits, such as index1part or basesize. Need to allow for negative values. */ 9 typedef int Width_T; 10 11 /* Number of entries, such as offsetscomp_blocksize */ 12 typedef unsigned int Blocksize_T; 13 14 15 /* A 2-byte word */ 16 typedef unsigned short UINT2; 17 18 /* A 4-byte word */ 19 typedef unsigned int UINT4; 20 typedef int INT4; 21 22 23 /* Compressed representation of genome (high, low, flags). Always 24 UINT4. Can think of as a genome block unit. */ 25 typedef UINT4 Genomecomp_T; 26 27 28 /* Can hold up to an 8-mer */ 29 typedef UINT4 Localspace_T; 30 31 /* An 8-byte word */ 32 /* Oligospace_T needs to hold 1 more than maximum Storedoligomer_T. 33 If 8-byte words are not available, then maximum k-mer is 15 */ 34 /* Prefer to use unsigned long long, whic should be 8 bytes on all systems */ 35 #if (SIZEOF_UNSIGNED_LONG_LONG == 8) 36 #define HAVE_64_BIT 1 37 #define MAXIMUM_KMER 18 38 typedef unsigned long long UINT8; 39 typedef unsigned long long Oligospace_T; 40 41 /* 8765432187654321 */ 42 #define LEFT_A 0x0000000000000000 43 #define LEFT_C 0x4000000000000000 44 #define LEFT_G 0x8000000000000000 45 #define LEFT_T 0xC000000000000000 46 47 /* 8765432187654321 */ 48 #define RIGHT_A 0x0000000000000000 49 #define RIGHT_C 0x0000000000000001 50 #define RIGHT_G 0x0000000000000002 51 #define RIGHT_T 0x0000000000000003 52 53 54 #elif (SIZEOF_UNSIGNED_LONG == 8) 55 #define HAVE_64_BIT 1 56 #define MAXIMUM_KMER 18 57 typedef unsigned long UINT8; 58 typedef unsigned long Oligospace_T; 59 60 /* 8765432187654321 */ 61 #define LEFT_A 0x0000000000000000 62 #define LEFT_C 0x4000000000000000 63 #define LEFT_G 0x8000000000000000 64 #define LEFT_T 0xC000000000000000 65 66 /* 8765432187654321 */ 67 #define RIGHT_A 0x0000000000000000 68 #define RIGHT_C 0x0000000000000001 69 #define RIGHT_G 0x0000000000000002 70 #define RIGHT_T 0x0000000000000003 71 72 73 #else 74 #define MAXIMUM_KMER 15 75 #define OLIGOSPACE_NOT_LONG 76 typedef unsigned int Oligospace_T; 77 78 /* 87654321 */ 79 #define LEFT_A 0x00000000 80 #define LEFT_C 0x40000000 81 #define LEFT_G 0x80000000 82 #define LEFT_T 0xC0000000 83 84 /* 87654321 */ 85 #define RIGHT_A 0x00000000 86 #define RIGHT_C 0x00000001 87 #define RIGHT_G 0x00000002 88 #define RIGHT_T 0x00000003 89 90 #endif 91 92 /* Contents of compressed offsets file. Storing as UINT4, even for 93 large genomes, to reduce zero-padding of bitstreams. For large 94 genomes, need to store 64-bit Positionsptr_T quantity in 2 UINT4 95 words. */ 96 typedef UINT4 Offsetscomp_T; 97 98 #if 0 99 /* Obsolete with handling of k-mers > 16. Use Oligospace_T instead */ 100 /* Holds a k-mer. Can be UINT4 as long as k <= 16. */ 101 /* Some procedures use Shortoligomer_T, which should be the same */ 102 typedef UINT4 Storedoligomer_T; 103 #else 104 typedef UINT4 Shortoligomer_T; 105 #endif 106 107 108 /* Definitions */ 109 /* Large genome: Genomic length > 2^32, needing 8-byte Univcoord_T */ 110 /* Huge genome: Entries in positions file > 2^32, needing 8-byte Positionsptr_T */ 111 112 /* An offset into the positions file of an IndexDB. For small genomes 113 < 2^32 bp such as human, need 3 billion divided by sampling 114 interval (default 3), requiring a maximum of 32 bits or 4 bytes 115 (Positionsptr_T). For huge genomes or more frequent sampling, 116 need 8 bytes, or Hugepositionsptr_T. */ 117 #ifdef HAVE_64_BIT 118 119 #ifdef UTILITYP 120 typedef UINT8 Hugepositionsptr_T; 121 typedef UINT4 Positionsptr_T; 122 #elif defined(LARGE_GENOMES) 123 /* Don't really need offsets to be 8-byte unless we have a huge 124 genome, but this simplifies the code */ 125 typedef UINT8 Hugepositionsptr_T; 126 typedef UINT8 Positionsptr_T; 127 #else 128 typedef UINT4 Positionsptr_T; 129 #endif 130 131 #else 132 typedef UINT4 Positionsptr_T; 133 #endif 134 135 136 /* For definition of Chrpos_T, see genomicpos.h */ 137 138 /* Transcriptome expected to be a small genome */ 139 typedef UINT4 Trcoord_T; 140 141 /* For univintervals and Univ_IIT (chromosome_iit) files. Use the largest word size allowable on the machine. */ 142 #ifdef HAVE_64_BIT 143 typedef UINT8 Univ_IIT_coord_T; 144 #else 145 typedef UINT4 Univ_IIT_coord_T; 146 #endif 147 148 typedef enum {NO_SPLICE, DONOR, ANTIDONOR, ACCEPTOR, ANTIACCEPTOR} Splicetype_T; 149 150 /* For splicetrie */ 151 typedef UINT4 Trieoffset_T; 152 typedef UINT4 Triecontent_T; 153 154 /* For suffix array */ 155 /* typedef UINT4 Sarrayptr_T; */ 156 157 #define GMAP_IMPROVEMENT 1 158 #define GMAP_ENDS 2 159 #define GMAP_PAIRSEARCH 4 160 161 #endif 162