1 /* 2 * Copyright (c) 2003, 2005-2007, 2010, 2013 Genome Research Ltd. 3 * Author(s): James Bonfield 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above 12 * copyright notice, this list of conditions and the following 13 * disclaimer in the documentation and/or other materials provided 14 * with the distribution. 15 * 16 * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 * Institute nor the names of its contributors may be used to endorse 18 * or promote products derived from this software without specific 19 * prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Author(s): James Bonfield, Simon Dear, Rodger Staden 36 * 37 * Copyright (c) 1995, 1997-1998, 2000-2001 MEDICAL RESEARCH COUNCIL 38 * All rights reserved 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions are met: 42 * 43 * 1 Redistributions of source code must retain the above copyright notice, 44 * this list of conditions and the following disclaimer. 45 * 46 * 2 Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in 48 * the documentation and/or other materials provided with the 49 * distribution. 50 * 51 * 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF 52 * MOLECULAR BIOLOGY nor the names of its contributors may be used 53 * to endorse or promote products derived from this software without 54 * specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 57 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 60 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) Medical Research Council 1994. All rights reserved. 71 * 72 * Permission to use, copy, modify and distribute this software and its 73 * documentation for any purpose is hereby granted without fee, provided that 74 * this copyright and notice appears in all copies. 75 * 76 * This file was written by James Bonfield, Simon Dear, Rodger Staden, 77 * as part of the Staden Package at the MRC Laboratory of Molecular 78 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. 79 * 80 * MRC disclaims all warranties with regard to this software. 81 */ 82 83 #ifndef _Read_h_ 84 #define _Read_h_ 85 86 /* 87 * Title: Read 88 * 89 * File: Read.h 90 * Purpose: Read data type 91 * Last update: June 14 1994 92 */ 93 94 /* 95 * This module encodes the `Read' sequence data structure. 96 * 97 * A `Read' contains information about bases and traces which are laid 98 * out along a single dimension of points. The number of points in a 99 * paricular sequence is given by `getNPoints', and these are numbered 100 * 0..getNPoints-1. At each point there are four trace readings, one 101 * for each base. 102 * 103 * The number of bases is `getNBases' which are numbered 0..N-1. 104 * Bases are represented by `char's. Every base is located at a 105 * particular point. 106 * 107 * The behaviour of these routines is undefined if given NULLRead or 108 * an undefined sequence. 109 */ 110 111 #include "io_lib/os.h" 112 #include "io_lib/scf.h" 113 #include "io_lib/mFILE.h" 114 115 #ifdef __cplusplus 116 extern "C" { 117 #endif 118 119 /* 120 *----------------------------------------------------------------------------- 121 * Macros 122 *----------------------------------------------------------------------------- 123 */ 124 125 #define NULLRead ((Read *)NULL) 126 127 /* Trace file formats */ 128 #define TT_ERR -1 129 #define TT_UNK 0 130 #define TT_SCF 1 131 #define TT_ABI 2 132 #define TT_ALF 3 133 #define TT_PLN 4 134 #define TT_EXP 5 135 #define TT_ZTR 7 136 #define TT_ZTR1 8 137 #define TT_ZTR2 9 138 #define TT_ZTR3 10 139 #define TT_BIO 11 140 #define TT_SFF 12 141 #define TT_ANY TT_UNK 142 /* ANYTR is specifically any *trace* type and not EXP or PLN format */ 143 #define TT_ANYTR 13 144 145 #define READ_BASES (1<<0) 146 #define READ_SAMPLES (1<<1) 147 #define READ_COMMENTS (1<<2) 148 #define READ_ALL (READ_BASES | READ_SAMPLES | READ_COMMENTS) 149 150 /* 151 *----------------------------------------------------------------------------- 152 * Structures and typedefs 153 *----------------------------------------------------------------------------- 154 */ 155 156 typedef uint_2 TRACE; /* for trace heights */ 157 158 typedef struct 159 { 160 int format; /* Trace file format */ 161 char *trace_name; /* Trace file name */ 162 163 int NPoints; /* No. of points of data */ 164 int NBases; /* No. of bases */ 165 166 /* Traces */ 167 TRACE *traceA; /* Array of length `NPoints' */ 168 TRACE *traceC; /* Array of length `NPoints' */ 169 TRACE *traceG; /* Array of length `NPoints' */ 170 TRACE *traceT; /* Array of length `NPoints' */ 171 TRACE maxTraceVal; /* The maximal value in any trace */ 172 int baseline; /* The zero offset for TRACE values */ 173 174 /* Bases */ 175 char *base; /* Array of length `NBases' */ 176 uint_2 *basePos; /* Array of length `NBases' */ 177 178 /* Cutoffs */ 179 int leftCutoff; /* Number of unwanted bases */ 180 int rightCutoff; /* First unwanted base at right end */ 181 182 /* Miscellaneous Sequence Information */ 183 char *info; /* misc seq info, eg comments */ 184 185 /* Probability information */ 186 char *prob_A; /* Array of length 'NBases' */ 187 char *prob_C; /* Array of length 'NBases' */ 188 char *prob_G; /* Array of length 'NBases' */ 189 char *prob_T; /* Array of length 'NBases' */ 190 191 /* The original input format data, or NULL if inapplicable */ 192 int orig_trace_format; 193 void (*orig_trace_free)(void *ptr); 194 void *orig_trace; 195 196 char *ident; /* Seq id, NULL for unknown. Malloced data. 197 Owned and freed by io_lib. */ 198 199 /* Pyrosequencing "peaks" (more like spikes). NULL if not used */ 200 int nflows; /* Number of "flows" */ 201 char *flow_order; /* Bases flowed across */ 202 float *flow; /* Processed to be 1 base unit oriented */ 203 unsigned int*flow_raw; /* Unprocessed data */ 204 205 void *private_data; /* The 'private data' block and size from SCF, */ 206 int private_size; /* NULL & 0 if not present. */ 207 } Read; 208 209 210 /* 211 *----------------------------------------------------------------------------- 212 * Function prototypes 213 *----------------------------------------------------------------------------- 214 */ 215 216 217 /* ----- Main I/O routines ----- */ 218 219 /* 220 * Read a sequence from a file "fn" of format "format". If "format" is 0 221 * (TT_ANY), we automatically determine the correct format. 222 * 223 * Returns: 224 * Read * for success 225 * NULLRead for failure 226 */ 227 Read *read_reading(char *fn, int format); 228 Read *fread_reading(FILE *fp, char *fn, int format); 229 Read *mfread_reading(mFILE *fp, char *fn, int format); 230 231 232 /* 233 * Write a sequence to a file "fn" of format "format". If "format" is 0, 234 * we choose our favourite - SCF. 235 * 236 * Returns: 237 * 0 for success 238 * -1 for failure 239 */ 240 int write_reading(char *fn, Read *read, int format); 241 int fwrite_reading(FILE *fp, Read *read, int format); 242 int mfwrite_reading(mFILE *fp, Read *read, int format); 243 244 245 /* ----- Utility routines ----- */ 246 247 /* 248 * Allocate a new sequence, with the given sizes. 249 * Returns: 250 * "Read *" for success 251 * "NULLRead" for failure 252 */ 253 Read *read_allocate(int num_points, int num_bases); 254 255 256 /* 257 * Duplicates the read structure and optionally gives it a new filename. 258 * The following fields are not duplicated: 259 * 260 * int orig_trace_format; 261 * void (*orig_trace_free)(void *ptr); 262 * void *orig_trace; 263 * char *ident; 264 * 265 * Returns: 266 * "Read *" for success 267 * "NULLRead" for failure 268 */ 269 Read* read_dup( Read* src, const char* new_name ); 270 271 272 /* 273 * Free memory allocated to a sequence by read_allocate(). 274 */ 275 void read_deallocate(Read *read); 276 277 /* unix specific file deletion routine */ 278 279 int remove_file(char *fn); 280 281 Read *read_abi(char *fn); 282 Read *fread_abi(FILE *fp); 283 Read *mfread_abi(mFILE *fp); 284 int write_abi(char *fn, Read *read); 285 int fwrite_abi(FILE *fp, Read *read); 286 int mfwrite_abi(mFILE *fp, Read *read); 287 288 int write_alf(char *fn, Read *read); 289 int fwrite_alf(FILE *fp, Read *read); 290 int mfwrite_alf(mFILE *fp, Read *read); 291 Read *read_alf(char *fn); 292 Read *fread_alf(FILE *fp); 293 Read *mfread_alf(mFILE *fp); 294 295 int write_pln(char *fn, Read *read); 296 int fwrite_pln(FILE *fp, Read *read); 297 int mfwrite_pln(mFILE *fp, Read *read); 298 Read *read_pln(char *fn); 299 Read *fread_pln(FILE *fp); 300 Read *mfread_pln(mFILE *fp); 301 302 int read_sections(int sec); 303 304 #include "io_lib/translate.h" 305 #include "io_lib/compress.h" 306 307 #ifdef __cplusplus 308 } 309 #endif 310 311 #endif /* _Read_h_ */ 312