1 /*
2  * Copyright (c) 2005-2008 Genome Research Ltd.
3  * Author(s): James Bonfield
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *    1. Redistributions of source code must retain the above copyright notice,
9  *       this list of conditions and the following disclaimer.
10  *
11  *    2. Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *
16  *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17  *    Institute nor the names of its contributors may be used to endorse
18  *    or promote products derived from this software without specific
19  *    prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25  * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Author(s): James Bonfield
36  *
37  * Copyright (c) 2001 MEDICAL RESEARCH COUNCIL
38  * All rights reserved
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions are met:
42  *
43  *    1 Redistributions of source code must retain the above copyright notice,
44  *      this list of conditions and the following disclaimer.
45  *
46  *    2 Redistributions in binary form must reproduce the above copyright
47  *      notice, this list of conditions and the following disclaimer in
48  *      the documentation and/or other materials provided with the
49  *      distribution.
50  *
51  *    3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF
52  *      MOLECULAR BIOLOGY nor the names of its contributors may be used
53  *      to endorse or promote products derived from this software without
54  *      specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
57  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
60  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 #ifndef _ZTR_H
70 #define _ZTR_H
71 
72 #include "io_lib/Read.h"
73 #include "io_lib/deflate_interlaced.h"
74 
75 #ifdef __cplusplus
76 extern "C" {
77 #endif
78 
79 /* The header */
80 typedef struct {
81     unsigned char  magic[8];	  /* 0xae5a54520d0a1a0a (be) */
82     unsigned char  version_major; /* ZTR_VERSION_MAJOR */
83     unsigned char  version_minor; /* ZTR_VERSION_MINOR */
84 } ztr_header_t;
85 
86 /* The ZTR magic numbers */
87 #define ZTR_MAGIC   		"\256ZTR\r\n\032\n"
88 #define ZTR_VERSION_MAJOR	1
89 #define ZTR_VERSION_MINOR	2
90 
91 /*
92  * CHUNKS
93  *
94  * Chunks consist of a block length followed by the type, format and data.
95  */
96 
97 typedef struct {
98     uint4 type;			/* chunk type (be) */
99     uint4 mdlength;		/* length of meta data field (be) */
100     char *mdata;		/* meta data */
101     uint4 dlength;		/* length of data field (be) */
102     char *data;			/* a format byte and the data itself */
103     int   ztr_owns;		/* boolean: true if we can free (meta)data */
104 } ztr_chunk_t;
105 
106 /* Format types */
107 #define ZTR_FORM_RAW		0
108 #define ZTR_FORM_RLE		1
109 #define ZTR_FORM_ZLIB		2
110 #define ZTR_FORM_XRLE		3
111 #define ZTR_FORM_XRLE2		4
112 #define ZTR_FORM_DELTA1		64
113 #define ZTR_FORM_DELTA2		65
114 #define ZTR_FORM_DELTA4		66
115 #define ZTR_FORM_DDELTA1	67
116 #define ZTR_FORM_DDELTA2	68
117 #define ZTR_FORM_DDELTA4	69
118 #define ZTR_FORM_16TO8		70
119 #define ZTR_FORM_32TO8		71
120 #define ZTR_FORM_FOLLOW1	72
121 #define ZTR_FORM_CHEB445	73
122 #define ZTR_FORM_ICHEB		74
123 #define ZTR_FORM_LOG2		75
124 #define ZTR_FORM_STHUFF	        77
125 #define ZTR_FORM_QSHIFT		79
126 #define ZTR_FORM_TSHIFT		80
127 
128 /* Converts a C string to a big-endian 4-byte int */
129 #define ZTR_STR2BE(str) (((str)[0] << 24) + \
130                          ((str)[1] << 16) + \
131                          ((str)[2] <<  8) + \
132                          ((str)[3] <<  0))
133 
134 /* Converts a big-endian 4-byte int to a C string */
135 #define ZTR_BE2STR(i,str) (((str)[0]=((i)>>24)&0xff),\
136                            ((str)[1]=((i)>>16)&0xff),\
137                            ((str)[2]=((i)>> 8)&0xff),\
138                            ((str)[3]=((i)>> 0)&0xff),\
139 			   (str)[4]='\0',str)\
140 
141 #define ZTR_TYPE_HEADER	0xae5a5452 /* M-. Z T R */
142 
143 #define ZTR_TYPE_SAMP	0x53414d50
144 #define ZTR_TYPE_SMP4	0x534d5034
145 #define ZTR_TYPE_BASE	0x42415345
146 #define ZTR_TYPE_BPOS	0x42504f53
147 #define ZTR_TYPE_CNF4	0x434e4634
148 #define ZTR_TYPE_CNF1	0x434e4631
149 #define ZTR_TYPE_CSID	0x43534944
150 #define ZTR_TYPE_TEXT	0x54455854
151 #define ZTR_TYPE_CLIP	0x434c4950
152 #define ZTR_TYPE_COMM	0x434f4d4d
153 #define ZTR_TYPE_CR32	0x43523332
154 #define ZTR_TYPE_FLWO	0x464c574f
155 #define ZTR_TYPE_FLWC	0x464c5743
156 #define ZTR_TYPE_HUFF   0x48554646
157 #define ZTR_TYPE_REGN   0x5245474e
158 
159 /* A text segment consists of identifier and value */
160 typedef struct {
161     char *ident; /* Pointer to identifier */
162     char *value; /* Pointer to value */
163 } ztr_text_t;
164 
165 typedef struct {
166     int ztr_owns; /* true is ZTR is to free the data later */
167     huffman_codeset_t *codes;
168 } ztr_hcode_t;
169 
170 /* The main ZTR structure, which holds the entire file contents */
171 typedef struct {
172     /* General bits to do with the ZTR file format */
173     ztr_header_t header;	/* File Header */
174     ztr_chunk_t *chunk;		/* Array of chunks */
175     int nchunks;		/* Number of chunks */
176 
177     /* Specifics to do with the standard chunk types */
178     ztr_text_t *text_segments;
179     int ntext_segments;
180 
181     /* 'Hint' for delta of SAMP and SMP4 */
182     int delta_level;
183 
184     /* Cached huffman encoding/decoding tables for STHUFF format */
185     ztr_hcode_t *hcodes;
186     int nhcodes;
187     int hcodes_checked;
188 } ztr_t;
189 
190 int ztr_read_header(mFILE *fp, ztr_header_t *h);
191 ztr_chunk_t *ztr_read_chunk_hdr(mFILE *fp);
192 
193 int fwrite_ztr(FILE *fp, ztr_t *ztr);
194 int mfwrite_ztr(mFILE *fp, ztr_t *ztr);
195 ztr_t *fread_ztr(FILE *fp);
196 ztr_t *mfread_ztr(mFILE *fp);
197 Read *ztr2read(ztr_t *ztr);
198 ztr_t *read2ztr(Read *r);
199 int compress_ztr(ztr_t *ztr, int level);
200 int uncompress_ztr(ztr_t *ztr);
201 ztr_t *new_ztr(void);
202 void delete_ztr(ztr_t *ztr);
203 ztr_chunk_t **ztr_find_chunks(ztr_t *ztr, uint4 type, int *nchunks_p);
204 void ztr_process_text(ztr_t *ztr);
205 int compress_chunk(ztr_t *ztr, ztr_chunk_t *chunk, int format,
206 		   int option, int option2);
207 int uncompress_chunk(ztr_t *ztr, ztr_chunk_t *chunk);
208 ztr_hcode_t *ztr_add_hcode(ztr_t *ztr, huffman_codeset_t *codes, int ztr_owns);
209 int ztr_store_hcodes(ztr_t *ztr);
210 ztr_hcode_t *ztr_find_hcode(ztr_t *ztr, int code_set);
211 ztr_chunk_t *ztr_find_hcode_chunk(ztr_t *ztr, int code_set);
212 char *ztr_lookup_mdata_value(ztr_t *z, ztr_chunk_t *chunk, char *key);
213 ztr_chunk_t *ztr_new_chunk(ztr_t *ztr, uint4 type,
214 			   char *data,  uint4 dlength,
215 			   char *mdata, uint4 mdlength);
216 ztr_chunk_t *ztr_add_text(ztr_t *z, ztr_chunk_t *ch,
217 			  const char *key, const char *value);
218 
219 #ifdef __cplusplus
220 }
221 #endif
222 
223 #endif /* _ZTR_H */
224