1 #ifndef __SEEK_ZLIB_H_
2 #define __SEEK_ZLIB_H_
3 
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <zlib.h>
8 #include "subread.h"
9 
10 #define PARALLEL_GZIP_TXT_BUFFER_MARGIN (2* MAX_FC_READ_LENGTH + 500)
11 #define PARALLEL_GZIP_TXT_BUFFER_SIZE (1024*1024)
12 #define PARALLEL_GZIP_ZIPPED_BUFFER_SIZE (PARALLEL_GZIP_TXT_BUFFER_SIZE *9/8 )
13 
14 typedef struct{
15 	int thread_no;
16 	int in_buffer_used;
17 	int out_buffer_used;
18 	unsigned int CRC32;
19 	unsigned int zipped_CRC32;
20 	int plain_length;
21 	char in_buffer[PARALLEL_GZIP_TXT_BUFFER_SIZE];
22 	char out_buffer[PARALLEL_GZIP_ZIPPED_BUFFER_SIZE];
23 	z_stream zipper;
24 } parallel_gzip_writer_thread_t;
25 
26 typedef struct{
27 	int threads;
28 	srInt_64 plain_length;
29 	unsigned int CRC32;
30 	FILE * os_file;
31 	parallel_gzip_writer_thread_t * thread_objs;
32 } parallel_gzip_writer_t;
33 
34 void parallel_gzip_writer_init(parallel_gzip_writer_t * pzwtr, char * output_filename, int total_threads);
35 void parallel_gzip_writer_add_text(parallel_gzip_writer_t * pzwtr, char * text, int tlen, int thread_no);
36 // because we have to keep sync between three fastq files, the flush function has to be manually called three times at the same time point.
37 // otherwise R1, I2 and R2 files will have inconsistent read orders.
38 // the outer program has to check if any of the three in_buffers is full.
39 void parallel_gzip_zip_texts(parallel_gzip_writer_t * pzwtr, int thread_no, int for_eof_marker);
40 void parallel_gzip_writer_flush(parallel_gzip_writer_t * pzwtr, int thread_no);
41 void parallel_gzip_writer_close(parallel_gzip_writer_t * pzwtr);
42 int parallel_gzip_writer_add_read_fqs_scRNA(parallel_gzip_writer_t**outfps, char * bambin, int thread_no);
43 
44 // returns 0 if OK; returns 1 if the file is not indexable; returns -1 if file doesn't exist.
45 int seekgz_open(const char * fname, seekable_zfile_t * fp, FILE * old_fp);
46 
47 // returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF
48 int seekgz_gets(seekable_zfile_t * fp, char * buf, int buf_size);
49 
50 void seekgz_tell(seekable_zfile_t * fp, seekable_position_t * pos);
51 
52 void seekgz_seek(seekable_zfile_t * fp, seekable_position_t * pos);
53 
54 // Diff: seekgz_next_char returns EOF for EOF but seekgz_next_int8 returns -1 for EOF
55 int seekgz_next_char(seekable_zfile_t * fp);
56 int seekgz_next_int8(seekable_zfile_t * fp);
57 
58 void seekgz_close(seekable_zfile_t * fp);
59 
60 // returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF
61 int autozip_gets(autozip_fp * fp, char * buf, int buf_size);
62 
63 
64 // return -1 for EOF
65 int autozip_getch(autozip_fp * fp);
66 
67 void autozip_close(autozip_fp * fp);
68 
69 // return -1 if error, return 0 if plain text, return 1 if gzipped
70 int autozip_open(const char * fname, autozip_fp * fp);
71 
72 void autozip_rewind(autozip_fp * fp);
73 
74 int seekgz_preload_buffer( seekable_zfile_t * fp , subread_lock_t * read_lock);
75 
76 // returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF
77 int seekgz_gets(seekable_zfile_t * fp, char * buff, int buff_len);
78 #endif
79