1 #ifndef __SEEK_ZLIB_H_ 2 #define __SEEK_ZLIB_H_ 3 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <string.h> 7 #include <zlib.h> 8 #include "subread.h" 9 10 #define PARALLEL_GZIP_TXT_BUFFER_MARGIN (2* MAX_FC_READ_LENGTH + 500) 11 #define PARALLEL_GZIP_TXT_BUFFER_SIZE (1024*1024) 12 #define PARALLEL_GZIP_ZIPPED_BUFFER_SIZE (PARALLEL_GZIP_TXT_BUFFER_SIZE *9/8 ) 13 14 typedef struct{ 15 int thread_no; 16 int in_buffer_used; 17 int out_buffer_used; 18 unsigned int CRC32; 19 unsigned int zipped_CRC32; 20 int plain_length; 21 char in_buffer[PARALLEL_GZIP_TXT_BUFFER_SIZE]; 22 char out_buffer[PARALLEL_GZIP_ZIPPED_BUFFER_SIZE]; 23 z_stream zipper; 24 } parallel_gzip_writer_thread_t; 25 26 typedef struct{ 27 int threads; 28 srInt_64 plain_length; 29 unsigned int CRC32; 30 FILE * os_file; 31 parallel_gzip_writer_thread_t * thread_objs; 32 } parallel_gzip_writer_t; 33 34 void parallel_gzip_writer_init(parallel_gzip_writer_t * pzwtr, char * output_filename, int total_threads); 35 void parallel_gzip_writer_add_text(parallel_gzip_writer_t * pzwtr, char * text, int tlen, int thread_no); 36 // because we have to keep sync between three fastq files, the flush function has to be manually called three times at the same time point. 37 // otherwise R1, I2 and R2 files will have inconsistent read orders. 38 // the outer program has to check if any of the three in_buffers is full. 39 void parallel_gzip_zip_texts(parallel_gzip_writer_t * pzwtr, int thread_no, int for_eof_marker); 40 void parallel_gzip_writer_flush(parallel_gzip_writer_t * pzwtr, int thread_no); 41 void parallel_gzip_writer_close(parallel_gzip_writer_t * pzwtr); 42 int parallel_gzip_writer_add_read_fqs_scRNA(parallel_gzip_writer_t**outfps, char * bambin, int thread_no); 43 44 // returns 0 if OK; returns 1 if the file is not indexable; returns -1 if file doesn't exist. 45 int seekgz_open(const char * fname, seekable_zfile_t * fp, FILE * old_fp); 46 47 // returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF 48 int seekgz_gets(seekable_zfile_t * fp, char * buf, int buf_size); 49 50 void seekgz_tell(seekable_zfile_t * fp, seekable_position_t * pos); 51 52 void seekgz_seek(seekable_zfile_t * fp, seekable_position_t * pos); 53 54 // Diff: seekgz_next_char returns EOF for EOF but seekgz_next_int8 returns -1 for EOF 55 int seekgz_next_char(seekable_zfile_t * fp); 56 int seekgz_next_int8(seekable_zfile_t * fp); 57 58 void seekgz_close(seekable_zfile_t * fp); 59 60 // returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF 61 int autozip_gets(autozip_fp * fp, char * buf, int buf_size); 62 63 64 // return -1 for EOF 65 int autozip_getch(autozip_fp * fp); 66 67 void autozip_close(autozip_fp * fp); 68 69 // return -1 if error, return 0 if plain text, return 1 if gzipped 70 int autozip_open(const char * fname, autozip_fp * fp); 71 72 void autozip_rewind(autozip_fp * fp); 73 74 int seekgz_preload_buffer( seekable_zfile_t * fp , subread_lock_t * read_lock); 75 76 // returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF 77 int seekgz_gets(seekable_zfile_t * fp, char * buff, int buff_len); 78 #endif 79