1 /* 2 * Copyright (c) 2011-2013, 2018-2019 Genome Research Ltd. 3 * Author(s): James Bonfield 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above 12 * copyright notice, this list of conditions and the following 13 * disclaimer in the documentation and/or other materials provided 14 * with the distribution. 15 * 16 * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 * Institute nor the names of its contributors may be used to endorse 18 * or promote products derived from this software without specific 19 * prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef FQZ_COMP_QUAL_H 35 #define FQZ_COMP_QUAL_H 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #include <stdint.h> 42 43 /* Bit flags, deliberately mirroring BAM ones */ 44 #define FQZ_FREVERSE 16 45 #define FQZ_FREAD2 128 46 47 /* Current FQZ format version */ 48 #define FQZ_VERS 5 49 50 #define FQZ_MAX_STRAT 3 51 52 /* 53 * Minimal per-record information taken from a cram slice. 54 * 55 * To compress we need to know the junction from one quality string to 56 * the next (len), whether it is first/second read and whether it is 57 * reverse complemented (flags). 58 */ 59 typedef struct { 60 int num_records; 61 uint32_t *len; // of size num_records 62 uint32_t *flags; // of size num_records 63 } fqz_slice; 64 65 66 // Global flags 67 static const int GFLAG_MULTI_PARAM = 1; 68 static const int GFLAG_HAVE_STAB = 2; 69 static const int GFLAG_DO_REV = 4; 70 71 // Param flags 72 // Add PFLAG_HAVE_DMAP and a dmap[] for delta incr? 73 static const int PFLAG_DO_DEDUP = 2; 74 static const int PFLAG_DO_LEN = 4; 75 static const int PFLAG_DO_SEL = 8; 76 static const int PFLAG_HAVE_QMAP = 16; 77 static const int PFLAG_HAVE_PTAB = 32; 78 static const int PFLAG_HAVE_DTAB = 64; 79 static const int PFLAG_HAVE_QTAB = 128; 80 81 /* 82 * FQZ parameters. These may be simply passed in as NULL to fqz_compress 83 * and it'll automatically choose, but if we wish to have complete control 84 * then this (long) struct contains all the details. 85 * 86 * TODO: document all this! 87 */ 88 89 // A single parameter block 90 typedef struct { 91 // Starting context value 92 uint16_t context; 93 94 // flags 95 unsigned int pflags; 96 unsigned int do_sel, do_dedup, store_qmap, fixed_len; 97 unsigned char use_qtab, use_dtab, use_ptab; 98 99 // context bits and locations 100 unsigned int qbits, qloc; 101 unsigned int pbits, ploc; 102 unsigned int dbits, dloc; 103 unsigned int sbits, sloc; 104 105 // models 106 int max_sym, nsym, max_sel; 107 108 // tables / maps 109 unsigned int qmap[256]; 110 unsigned int qtab[256]; 111 unsigned int ptab[1024]; 112 unsigned int dtab[256]; 113 114 // Not stored paramters, but computed as part of encoder 115 // parameterisation. 116 int qshift; 117 int pshift; 118 int dshift; 119 int sshift; 120 unsigned int qmask; // (1<<qbits)-1 121 int do_r2, do_qa; 122 } fqz_param; 123 124 // The global params, which is a collection of parameter blocks plus 125 // a few pieces of meta-data. 126 typedef struct { 127 int vers; // Format version; Set to FQZ_VERS 128 unsigned int gflags; // global param flags 129 int nparam; // Number of fqz_param blocks 130 int max_sel; // Number of selector values 131 unsigned int stab[256]; // Selector to parameter no. table 132 133 int max_sym; // max symbol value across all sub-params 134 135 fqz_param *p; // 1 or more parameter blocks 136 } fqz_gparams; 137 138 139 /** Compress a block of quality values. 140 * 141 * @param vers The CRAM version number (<<8) plus fqz strategy (0-3) 142 * @param s Length and flag data CRAM per-record 143 * @param in Buffer of concatenated quality values (no separator) 144 * @param in_size Size of in buffer 145 * @param out_size Size of returned output 146 * @param strat FQZ compression strategy (0 to FQZ_MAX_STRAT) 147 * @param gp Optional fqzcomp paramters (may be NULL). 148 * 149 * @return The compressed quality buffer on success, 150 * NULL on failure. 151 */ 152 char *fqz_compress(int vers, fqz_slice *s, char *in, size_t in_size, 153 size_t *out_size, int strat, fqz_gparams *gp); 154 155 /** Decompress a block of quality values. 156 * 157 * @param in Buffer of compressed quality values 158 * @param in_size Size of in buffer 159 * @param out_size Size of returned output 160 * @param lengths Optional array filled out with record lengths. 161 * May be NULL. If not, preallocate it to correct size. 162 * 163 * @return The uncompressed concatenated qualities on success, 164 * NULL on failure. 165 */ 166 char *fqz_decompress(char *in, size_t in_size, size_t *out_size, 167 int *lengths, int nlengths); 168 169 /** A utlity function to analyse a quality buffer to gather statistical 170 * information. This is written into qhist and pm. This function is only 171 * useful if you intend on passing your own fqz_gparams block to 172 * fqz_compress. 173 */ 174 void fqz_qual_stats(fqz_slice *s, 175 unsigned char *in, size_t in_size, 176 fqz_param *pm, 177 uint32_t qhist[256], 178 int one_param); 179 180 #ifdef __cplusplus 181 } 182 #endif 183 184 #endif 185