1 /***************************************************************
2 
3    The Subread software package is free software package:
4    you can redistribute it and/or modify it under the terms
5    of the GNU General Public License as published by the
6    Free Software Foundation, either version 3 of the License,
7    or (at your option) any later version.
8 
9    Subread is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty
11    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 
13    See the GNU General Public License for more details.
14 
15    Authors: Drs Yang Liao and Wei Shi
16 
17   ***************************************************************/
18 
19 
20 #ifndef _SUBREAD_H_
21 #define _SUBREAD_H_
22 
23 #include <stdlib.h>
24 #include <pthread.h>
25 #include <stdio.h>
26 #include <zlib.h>
27 
28 #ifndef MAKE_STANDALONE
29 #ifndef RUNNING_ENV
30 #include <R.h>
31 #endif
32 #endif
33 
34 #include "hashtable.h"
35 
36 #define MAX_SCRNA_FASTQ_FILES 256
37 #define SCRNA_FASTA_SPLIT1 "|Rsd:cCounts:mFQs|"
38 #define SCRNA_FASTA_SPLIT2 "|Rsd:cCounts:1mFQ|"
39 
40 
41 #define SAM_FLAG_PAIRED_TASK	0x01
42 #define SAM_FLAG_FIRST_READ_IN_PAIR 0x40
43 #define SAM_FLAG_SECOND_READ_IN_PAIR 0x80
44 #define SAM_FLAG_MATE_UNMATCHED 0x08
45 #define SAM_FLAG_MATCHED_IN_PAIR 0x02
46 #define SAM_FLAG_REVERSE_STRAND_MATCHED 0x10
47 #define SAM_FLAG_MATE_REVERSE_STRAND_MATCHED 0x20
48 #define SAM_FLAG_SECONDARY_MAPPING 0x100
49 #define SAM_FLAG_DUPLICATE 0x400
50 #define SAM_FLAG_UNMAPPED 0x04
51 
52 #define SUBREAD_MAX_ULONGLONG 0xffffffffffffffffllu
53 #define SUBREAD_MAX_LONGLONG 0x7fffffffffffffffll
54 
55 #define FUSION_BREAK_POINT	2
56 #define FUSION_JUNCTION		1
57 #define SPLICING_JUNCTION	0
58 
59 #define RUN_ALIGN 		0
60 #define RUN_FINAL 		1
61 
62 
63 
64 #define MAX_THREADS 40
65 #define FC_MAX_THREADS 64
66 #define MAX_EVENTS_IN_READ 8
67 
68 //#warning "============== REMOVE '* 15' FROM THE NEXT LINE ================"
69 #define MAX_READ_LENGTH ( 1210 )
70 #define MAX_READ_NAME_LEN 200
71 #define MAX_CHROMOSOME_NAME_LEN 200
72 #define MAX_FILE_NAME_LENGTH (1000)
73 #define FEATURE_NAME_LENGTH 256
74 #define INPUT_BLC_MAX_READS 20
75 #define MAX_BARCODE_LEN 32
76 
77 //#warning "============== REMOVE '*1.2' FROM THE NEXT LINE ================"
78 #define MULTI_THREAD_OUTPUT_ITEMS  (4096 * 3/5 *3)
79 #define EXON_LONG_READ_LENGTH 160
80 #define EXON_MAX_CIGAR_LEN 256
81 #define FC_CIGAR_PARSER_ITEMS 11
82 #define FC_LONG_READ_RECORD_HARDLIMIT (8*1024*1024)
83 
84 #define MAX_INDEL_SECTIONS 7
85 //#define XBIG_MARGIN_RECORD_SIZE 24
86 #define MAX_INSERTION_LENGTH 200
87 #define MAX_DELETION_LENGTH 1000
88 //#define BASE_BLOCK_LENGTH 15000000
89 //#define NEED_SUBREAD_STATISTIC
90 
91 
92 #define IS_MIN_POS_NEGATIVE_STRAND 4
93 #define IS_MAX_POS_NEGATIVE_STRAND 12
94 #define IS_PAIRED_HINTED 16
95 #define IS_R1_CLOSE_TO_5 1
96 #define IS_REVERSED_HALVES 2
97 #define	IS_PROCESSED_READ 32
98 #define	IS_PROCESSED_READ_R2 64
99 #define IS_PAIRED_MATCH 128
100 #define IS_NEGATIVE_STRAND_R1 256
101 #define IS_NEGATIVE_STRAND_R2 512
102 #define IS_FUSION 1024
103 #define IS_NEGATIVE_STRAND 2048
104 #define IS_RECOVERED_JUNCTION_READ 4096
105 #define IS_FINALISED_PROCESSING 8192
106 #define IS_RECOVERED_JUNCTION_READ_STEP4 (8192*2)
107 #define	IS_BREAKEVEN_READ (8192*4)
108 #define IS_R1R2_EQUAL_LEN 1024
109 
110 #define USE_POSIX_MUTEX_LOCK
111 
112 #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(USE_POSIX_MUTEX_LOCK)
113 typedef pthread_mutex_t subread_lock_t;
114 #define pthread_spinlock_t pthread_mutex_t
115 #define pthread_spin_lock pthread_mutex_lock
116 #define pthread_spin_unlock pthread_mutex_unlock
117 #define pthread_spin_init(a, b) pthread_mutex_init(a, NULL)
118 #define pthread_spin_destroy(a) pthread_mutex_destroy(a)
119 #define strnlen(a,l) strlen(a)
120 #else
121 typedef pthread_spinlock_t subread_lock_t;
122 #endif
123 
124 #ifndef SRINT_64_DEFINED
125 #define SRINT_64_DEFINED
126 typedef long long srInt_64;
127 typedef unsigned long long srUInt_64;
128 #endif
129 
130 #ifdef __MINGW32__
131 #define ftello ftello64
132 #define fseeko fseeko64
133 #endif
134 
135 #if defined(MAKE_STANDALONE) || defined(RUNNING_ENV)
136 #define STANDALONE_exit(i) exit(i)
137 #define SUBREADprintf(...) fprintf(stderr, __VA_ARGS__)
138 #define SUBREADputs(x) fprintf(stderr, "%s\n", x)
139 #define SUBREADputchar(x) fputc(x, stderr)
140 #define SUBREADfflush(x) fflush(x)
141 #define CORE_SOFT_BR_CHAR '\n'
142 #else
143 #define STANDALONE_exit(i) return i;
144 
145 int safeRprintf(char *fmt, ...);
146 void msgqu_printf(const char * fmt, ...);
147 
148 #define SUBREADprintf  msgqu_printf
149 #define SUBREADputs(x) msgqu_printf("%s\n",(x))
150 #define SUBREADputchar(X) msgqu_printf("%c",(X))
151 #define SUBREADfflush(X)
152 #define CORE_SOFT_BR_CHAR '\n'
153 
154 #endif
155 
156 #ifndef NONONO_DONOTDEF
157 
158 #define QUALITY_KILL	198
159 #define QUALITY_KILL_SUBREAD	160
160 #define MAX_QUALITY_TO_CALL_JUNCTION 2195
161 #define MAX_QUALITY_TO_EXPLORER_JUNCTION 209
162 
163 #else
164 
165 #define TEST_TARGET ""
166 
167 #endif
168 
169 #define SNP_CALLING_ONLY_HIGHQUAL 1
170 
171 #define MESSAGE_OUT_OF_MEMORY "Out of memory. If you are using Rsubread in R, please save your working environment and restart R. \n"
172 #define fatal_memory_size(a) SUBREADputs(MESSAGE_OUT_OF_MEMORY);
173 
174 //#define QUALITY_KILL	175
175 //#define QUALITY_KILL_SUBREAD	150
176 
177 
178 typedef long long subread_read_number_t;
179 typedef unsigned int gehash_key_t;
180 typedef unsigned int gehash_data_t;
181 //typedef float gene_quality_score_t;
182 typedef int gene_quality_score_t;
183 //typedef unsigned char gene_vote_number_t;
184 typedef short gene_vote_number_t;
185 
186 
187 #define XOFFSET_TABLE_SIZE 250000
188 
189 #define ANCHORS_NUMBER 259
190 #define MAX_ALIGNMENT_PER_ANCHOR 2
191 
192 #define BEXT_RESULT_LIMIT 16
193 
194 #define SEARCH_BACK 0
195 #define SEARCH_FRONT 1
196 
197 //#define LARGE_GENE_VOTE_TABLE
198 #ifdef LARGE_GENE_VOTE_TABLE
199 #warning "Using LARGE_GENE_VOTE_TABLE"
200 #define GENE_VOTE_SPACE 173
201 #define GENE_VOTE_TABLE_SIZE 331
202 #else
203 #define GENE_VOTE_SPACE 24
204 #define GENE_VOTE_TABLE_SIZE 30
205 #endif
206 
207 #define MAX_ANNOTATION_EXONS 30000
208 #define MAX_EXONS_PER_GENE 400
209 #define MAX_EXON_CONNECTIONS 10
210 
211 #define MAX_GENE_NAME_LEN 128
212 #define MAX_INDEL_TOLERANCE 7
213 
214 #define SUBINDEX_VER0 100
215 #define SUBINDEX_VER1 200
216 #define SUBINDEX_VER2 201
217 
218 #define SUBREAD_INDEX_OPTION_INDEX_GAP 0x0101
219 #define SUBREAD_INDEX_OPTION_INDEX_PADDING 0x0102
220 
221 
222 #define CHAR_ESC 27
223 
224 //#define base2int(c) ((c)=='A'?0:((c)=='T'?3:((c)=='C'?2:1)))
225 #define base2int(c) ((c)<'G'?((c)=='A'?0:2):((c)=='G'?1:3))
226 
227 /*
228 #define base2int(c) (("\x3\x3\x3\x3\x3\x3\x3\x3" "\x3\x3\x3\x3\x3\x3\x3\x3"  "\x3\x3\x3\x3\x3\x3\x3\x3"  "\x3\x3\x3\x3\x3\x3\x3\x3"  "\x3\x3\x3\x3\x3\x3\x3\x3"  "\x3\x3\x3\x3\x3\x3\x3\x3"  "\x3\x3\x3\x3\x3\x3\x3\x3"  "\x3\x3\x3\x3\x3\x3\x3\x3"    "\x3"\
229 	 "\x0\x3\x2\x3\x3\x3\x1\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3\x3")[(int)(c)])
230          // A  B  C  D  E  F  G
231 */
232 
233 //#define int2base(c) ((c)==1?'G':((c)==0?'A':((c)==2?'C':'T')))
234 //#define int2base(c) ("AGCT"[(c)])
235 #define int2base(c) (1413695297 >> (8*(c))&0xff)
236 #define color2int(c) ((c) - '0')
237 #define int2color(c) ("0123"[(c)])
238 #define remove_backslash(str) { int xxxa=0; while(str[xxxa]){ if(str[xxxa]=='/'){str[xxxa]='\0'; break;} xxxa++;} /* SUBREADprintf("BSRRR %s\n", str);*/ }
239 
240 /*
241 #define get_base_error_prob64(a) ((a) < '@'-1?1:pow(10., -0.1*((a)-'@')))
242 #define get_base_error_prob33(a) ((a) < '!'-1?1:pow(10., -0.1*((a)-'!')))
243 
244 */
245 #define SUBREAD_malloc(a) malloc(a)
246 
247 #define FASTQ_PHRED33 1
248 #define FASTQ_PHRED64 0
249 
250 #define IS_DEBUG 0
251 
252 
253 typedef struct {
254   char gene_name [MAX_GENE_NAME_LEN];
255   // The chromosome name is not stored in this data structure
256   // All coordinates are translated into the linear location in the entire referenced genome, usually 0 ~ 3.2G
257   unsigned int start_offset;
258   unsigned int end_offset;
259 
260   // All exons are marked with the linear location in the entire referenced genome, usually 0 ~ 3.2G
261   // This marks the end of the list: exon_ends [total_number_of_exons] = 0
262   // It shouldn't be equal to 0, should it be?
263 
264   unsigned int exon_starts [MAX_EXONS_PER_GENE];
265   unsigned int exon_ends [MAX_EXONS_PER_GENE];
266 } gene_t;
267 
268 
269 
270 struct gehash_bucket {
271 	int current_items;
272 	int space_size;
273 	union
274 	{
275 		short * new_item_keys;
276 		gehash_key_t * item_keys;
277 	};
278 	gehash_data_t * item_values;
279 };
280 
281 
282 #ifdef __MINGW32__
283 #define GEHASH_MEM_PTR_NO (64)
284 #else
285 #define GEHASH_MEM_PTR_NO (64*1024)
286 #endif
287 typedef struct {
288 	int version_number;
289 	unsigned long long int current_items;
290 	int buckets_number;
291 	char is_small_table;
292 	struct gehash_bucket * buckets;
293 	int index_gap;
294 	int padding;
295 	char * malloc_ptr [GEHASH_MEM_PTR_NO];
296 	int free_item_only;
297 } gehash_t;
298 
299 
300 typedef struct{
301 	unsigned int memory_block_size;
302 	unsigned int start_base_offset;
303 	unsigned int start_point;
304 	unsigned int length;
305 	unsigned char * values;
306 	unsigned int values_bytes;
307 	void * appendix1;
308 	void * appendix2;
309 } gene_value_index_t;
310 
311 
312 typedef struct {
313 	gene_vote_number_t max_vote;
314 	gehash_data_t max_position;
315 	gene_quality_score_t max_quality;
316 	gene_vote_number_t max_indel_recorder[MAX_INDEL_TOLERANCE*3];
317 	gene_vote_number_t * max_tmp_indel_recorder;
318 	int max_mask;
319 	gene_vote_number_t noninformative_subreads;
320 
321 	unsigned short items[GENE_VOTE_TABLE_SIZE];
322 	unsigned int pos [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
323 	int masks [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
324 	int marked_shift_indel[GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
325 	gene_vote_number_t votes [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
326 	gene_quality_score_t quality [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
327 	gene_vote_number_t last_subread_cluster [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
328 	gene_vote_number_t indel_recorder [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE][MAX_INDEL_TOLERANCE*3];
329 	char current_indel_cursor[GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
330 	char toli[GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
331 
332 	#ifdef MAKE_FOR_EXON
333 	short coverage_start [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
334 	short coverage_end [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
335 	short max_coverage_start;
336 	short max_coverage_end;
337 	//#warning Switch "MAKE_FOR_EXON" is turned on. It may cost more time. Do not turn it on unless you want to detect junction reads.
338 	#endif
339 } gene_vote_t ;
340 
341 typedef struct{
342 	unsigned int pos;
343 	char len;
344 } indel_record_t;
345 
346 typedef struct{
347 	int count;
348 	int support;
349 } indel_result_t;
350 
351 typedef struct{
352 	unsigned char best_len;
353 	unsigned int offsets [BEXT_RESULT_LIMIT];
354 	unsigned char is_reverse [BEXT_RESULT_LIMIT];
355 } gene_best_record_t;
356 
357 
358 
359 typedef struct{
360 	int max_len;
361 	unsigned int * max_positions;
362 	unsigned char * is_counterpart;
363 	gene_vote_number_t * max_votes;
364 	gene_quality_score_t * max_quality;
365 	gene_quality_score_t * max_final_quality;
366 	short * masks;
367 	char * max_indel_recorder;
368 	char * span_coverage;
369 #ifdef REPORT_ALL_THE_BEST
370 	gene_best_record_t * best_records;
371 #endif
372 	char max_indel_tolerance;
373 	short indel_recorder_length;
374 
375 	unsigned char *repeated_regions;
376 
377 } gene_allvote_t;
378 
379 
380 typedef struct{
381 	int total_offsets;
382         char *read_names;
383         unsigned int *read_offsets;
384 	HashTable * read_name_to_index;
385 	int padding;
386 } gene_offset_t;
387 
388 
389 #define EXON_BUFFER_SIZE 3000
390 
391 struct thread_input_buffer {
392 	char read_names [EXON_BUFFER_SIZE][121];
393 	char read [EXON_BUFFER_SIZE][1201];
394 	char quality [EXON_BUFFER_SIZE][1201];
395 	int rl[EXON_BUFFER_SIZE];
396 	int write_pointer;
397 	int read_pointer;
398 
399 	unsigned int read_id[EXON_BUFFER_SIZE];
400 
401 };
402 
403 #define SEEKGZ_CHAIN_BLOCKS_NO 15
404 #define SEEKGZ_ZLIB_WINDOW_SIZE (32*1024)
405 #define PAIRER_GZIP_WINDOW_BITS -15
406 #define PAIRER_DEFAULT_MEM_LEVEL 8
407 
408 typedef struct {
409 	unsigned long long block_start_in_file_offset;
410 	unsigned int block_start_in_file_bits;
411 
412 	char block_dict_window[SEEKGZ_ZLIB_WINDOW_SIZE]; // copied from the rolling window before this block is decompressed.
413 	unsigned int block_dict_window_size;
414 
415 	char * block_txt;
416 	unsigned int * linebreak_positions;
417 	int linebreaks;
418 	int block_txt_size;
419 } seekable_decompressed_block_t;
420 
421 typedef struct {
422 	FILE * gz_fp;
423 	z_stream stem;
424 	char * in_zipped_buffer;
425 	unsigned int in_zipped_buff_read_ptr;
426 
427 	unsigned int current_block_txt_read_ptr;
428 	int blocks_in_chain;
429 	int has_multi_thread_accessed;
430 	int block_chain_current_no;
431 	seekable_decompressed_block_t block_rolling_chain[SEEKGZ_CHAIN_BLOCKS_NO];
432 
433 	int internal_error;
434 	subread_lock_t write_lock;
435 
436 	unsigned int rolling_dict_window_used;
437 	char rolling_dict_window[SEEKGZ_ZLIB_WINDOW_SIZE];
438 
439 	unsigned long long next_block_file_offset; // for the next block after ALL blocks in the chain
440 	unsigned int next_block_file_bits;  // for the next block after ALL blocks in the chain
441 } seekable_zfile_t;
442 
443 typedef struct{
444 	char dict_window[SEEKGZ_ZLIB_WINDOW_SIZE];
445 	unsigned long long block_gzfile_offset;
446 	unsigned int block_gzfile_bits;
447 	unsigned int block_dict_window_size;
448 
449 	unsigned int in_block_text_offset;
450 } seekable_position_t;
451 
452 typedef struct {
453 	unsigned long long read_number;
454 	int lane_id;
455 
456 	union{
457 		seekable_position_t ** pos_of_bclgzs;
458 		unsigned long long * pos_of_bcls;
459 	};
460 	union{
461 		seekable_position_t * pos_of_filtergz;
462 		unsigned long long pos_of_filter;
463 	};
464 	int is_EOF;
465 } input_BLC_pos_t;
466 
467 typedef struct {
468 	unsigned long long read_number;
469 	int total_bases_in_each_cluster;
470 	int single_read_lengths[INPUT_BLC_MAX_READS+1];
471 	int single_read_is_index[INPUT_BLC_MAX_READS];
472 	int current_lane, bcl_is_gzipped, filter_is_gzipped;
473 	char bcl_format_string[MAX_FILE_NAME_LENGTH];
474 	char filter_format_string[MAX_FILE_NAME_LENGTH];
475 	union{
476 		seekable_zfile_t ** bcl_gzip_fps;
477 		FILE ** bcl_fps;
478 	};
479 	union{
480 		seekable_zfile_t *  filter_gzip_fp;
481 		FILE *  filter_fp;
482 	};
483 	subread_lock_t read_lock;
484 	int is_EOF;
485 } input_BLC_t;
486 
487 typedef struct{
488 	union{
489 		srInt_64 pos_file1, pos_file2, pos_file3;
490 		seekable_position_t zpos_file1;
491 	};
492 	seekable_position_t zpos_file2;
493 	seekable_position_t zpos_file3;
494 	int current_file_no;
495 	srInt_64 current_read_no;
496 } input_mFQ_pos_t;
497 
498 typedef struct {
499 	char filename[MAX_FILE_NAME_LENGTH+1];
500 
501 	int is_plain;
502 	FILE * plain_fp;
503 	seekable_zfile_t gz_fp;
504 	int is_first_chars;
505 	unsigned char first_chars[2];
506 } autozip_fp;
507 
508 
509 
510 typedef struct {
511 	char ** files1;
512 	char ** files2;
513 	char ** files3;
514 	int total_files;
515 	int current_file_no;
516 	int current_guessed_lane_no;
517 	srInt_64 current_read_no;
518 	autozip_fp autofp1;
519 	autozip_fp autofp2;
520 	autozip_fp autofp3;
521 } input_mFQ_t;
522 
523 
524 typedef struct
525 {
526         char chro_name[MAX_CHROMOSOME_NAME_LEN];
527         unsigned int chro_length;
528 } SamBam_Reference_Info;
529 
530 
531 typedef struct{
532 	int current_BAM_file_no;
533 	srInt_64 section_start_pos;
534 	int in_section_offset;
535 	srInt_64 current_read_no;
536 } input_scBAM_pos_t;
537 
538 typedef struct {
539 	FILE * os_file;
540 	char * BAM_file_names[MAX_SCRNA_FASTQ_FILES];
541 	char section_buff[66000];
542 	char align_buff[FC_LONG_READ_RECORD_HARDLIMIT];
543 	int current_BAM_file_no;
544 	int total_BAM_files;
545 	int in_section_offset;
546 	int section_bin_bytes;
547 	int chro_table_size;
548 	SamBam_Reference_Info * chro_table;
549 	srInt_64 section_start_pos;
550 	srInt_64 current_read_no;
551 	subread_lock_t read_lock;
552 } input_scBAM_t;
553 
554 typedef struct {
555 	int read_no_in_chunk;
556 	int reads_available_in_chunk; // -1 : EOF of all input reads : no next chunk available.
557 								  // This can be set to -1 only when calling cacheBCL_netx_chunk().
558 	int chunk_no;
559 	int chunk_start_lane;
560 	int chunk_end_lane;
561 	int reads_per_chunk;
562 	int last_chunk_in_cache;
563 	int total_bases_in_each_cluster;
564 	int single_read_lengths[INPUT_BLC_MAX_READS+1];
565 	int single_read_is_index[INPUT_BLC_MAX_READS];
566 	int current_lane, bcl_is_gzipped, filter_is_gzipped;
567 	int all_threads;
568 	char bcl_format_string[MAX_FILE_NAME_LENGTH];
569 	char filter_format_string[MAX_FILE_NAME_LENGTH];
570 	int bcl_no_is_used[MAX_READ_LENGTH];
571 	autozip_fp * bcl_gzip_fps;
572 	autozip_fp   filter_fp;
573 	subread_lock_t read_lock;
574 	char ** bcl_bin_cache;
575 	int  flt_bin_cache_size;
576 	char *  flt_bin_cache;
577 	char * lane_no_in_chunk;
578 	int is_EOF;
579 } cache_BCL_t;
580 
581 typedef struct{
582 	union{
583 		unsigned long long simple_file_position;
584 		seekable_position_t seekable_gzip_position;
585 		input_BLC_pos_t BCL_position;
586 		input_mFQ_pos_t mFQ_position;
587 		input_scBAM_pos_t scBAM_position;
588 	};
589 	char gzfa_last_name[MAX_READ_NAME_LEN];
590 } gene_inputfile_position_t;
591 
592 
593 typedef struct{
594 	unsigned int small_key;
595 	unsigned int big_key;
596 } paired_exon_key;
597 
598 typedef struct{
599 	unsigned int supporting_reads;
600 	char is_fusion;
601 	char big_pos_neg;
602 	char small_pos_neg;
603 } fusion_record;
604 
605 double miltime();
606 
607 typedef struct{
608 	char chromosome_name[MAX_CHROMOSOME_NAME_LEN];
609 	unsigned long known_length;
610 } chromosome_t;
611 
612 typedef struct{
613 	unsigned char record_type;
614 	unsigned char  mapping_quality;
615 	unsigned short read_pos;
616 	unsigned short read_len;
617 	unsigned short flags;
618 	unsigned int read_number;
619 	unsigned int pos;
620 	unsigned short mapped_segment_in_read;
621 	char strand;	// 0 = positive, 1 = negative
622 } base_block_temp_read_t;
623 
624 typedef struct{
625 	unsigned char record_type;
626 	unsigned int pos;
627 	short type;
628 } VCF_temp_read_t;
629 
630 typedef struct {
631 	char filename [MAX_FILE_NAME_LENGTH * MAX_SCRNA_FASTQ_FILES * 3];
632 	int space_type ;
633 	int file_type ;
634 	void * input_fp;   // can be system (FILE * sam or fastq or fasta), (seekable_zfile_t *)
635 	char gzfa_last_name[MAX_READ_NAME_LEN];
636 	unsigned long long read_chunk_start;
637 	union{
638 		cache_BCL_t bcl_input;
639 		input_mFQ_t scRNA_fq_input;
640 		input_scBAM_t scBAM_input;
641 	};
642 } gene_input_t;
643 
644 
645 
646 struct explorer_section_t
647 {
648 	unsigned int start_pos;
649 
650 	short read_pos_start;
651 	short read_pos_end;
652 
653 	char is_neg_strand;
654 	short indels;
655 	short indel_pos;
656 
657 	short all_indel_poses[10];
658 	short all_indels[10];
659 };
660 
661 struct explorer_record_t
662 {
663 	struct explorer_section_t cigar_record[6];
664 	short b_search_tail;
665 };
666 
667 FILE * f_subr_open(const char * fname, const char * mode);
668 void myrand_srand(unsigned long long seed);
669 int myrand_rand();
670 #define abs(a) 	  ((a)>=0?(a):-(a))
671 #define max(a,b)  ((a)<(b)?(b):(a))
672 #define min(a,b)  ((a)>(b)?(b):(a))
673 
674 
675 #endif
676