1 #ifndef VIENNA_RNA_PACKAGE_FILE_FORMATS_H 2 #define VIENNA_RNA_PACKAGE_FILE_FORMATS_H 3 4 #ifdef VRNA_WARN_DEPRECATED 5 # if defined(__clang__) 6 # define DEPRECATED(func, msg) func __attribute__ ((deprecated("", msg))) 7 # elif defined(__GNUC__) 8 # define DEPRECATED(func, msg) func __attribute__ ((deprecated(msg))) 9 # else 10 # define DEPRECATED(func, msg) func 11 # endif 12 #else 13 # define DEPRECATED(func, msg) func 14 #endif 15 16 /** 17 * @file ViennaRNA/io/file_formats.h 18 * @ingroup file_utils, file_formats 19 * @brief Read and write different file formats for RNA sequences, structures 20 */ 21 22 /** 23 * @addtogroup file_formats 24 * @{ 25 * @brief Functions to read/write different file formats for nucleic acid sequences and secondary structures 26 */ 27 28 #include <stdio.h> 29 30 #include <ViennaRNA/datastructures/basic.h> 31 32 /** 33 * @brief Print a secondary structure as helix list 34 * 35 * @param seq The RNA sequence 36 * @param db The structure in dot-bracket format 37 * @param energy Free energy of the structure in kcal/mol 38 * @param file The file handle used to print to (print defaults to 'stdout' if(file == NULL) ) 39 */ 40 void 41 vrna_file_helixlist(const char *seq, 42 const char *db, 43 float energy, 44 FILE *file); 45 46 47 /** 48 * @brief Print a secondary structure as connect table 49 * 50 * Connect table file format looks like this: 51 * @verbatim 52 * 300 ENERGY = 7.0 example 53 * 1 G 0 2 22 1 54 * 2 G 1 3 21 2 55 * @endverbatim 56 * where the headerline is followed by 6 columns with: 57 * 1. Base number: index n 58 * 2. Base (A, C, G, T, U, X) 59 * 3. Index n-1 (0 if first nucleotide) 60 * 4. Index n+1 (0 if last nucleotide) 61 * 5. Number of the base to which n is paired. No pairing is indicated by 0 (zero). 62 * 6. Natural numbering. 63 * 64 * @param seq The RNA sequence 65 * @param db The structure in dot-bracket format 66 * @param energy The free energy of the structure 67 * @param identifier An optional identifier for the sequence 68 * @param file The file handle used to print to (print defaults to 'stdout' if(file == NULL) ) 69 */ 70 void 71 vrna_file_connect(const char *seq, 72 const char *db, 73 float energy, 74 const char *identifier, 75 FILE *file); 76 77 78 /** 79 * @brief Print a secondary structure in bpseq format 80 * 81 * @param seq The RNA sequence 82 * @param db The structure in dot-bracket format 83 * @param file The file handle used to print to (print defaults to 'stdout' if(file == NULL) ) 84 */ 85 void 86 vrna_file_bpseq(const char *seq, 87 const char *db, 88 FILE *file); 89 90 91 #if VRNA_WITH_JSON_SUPPORT 92 93 /** 94 * @brief Print a secondary structure in jsonformat 95 * 96 * @param seq The RNA sequence 97 * @param db The structure in dot-bracket format 98 * @param energy The free energy 99 * @param identifier An identifier for the sequence 100 * @param file The file handle used to print to (print defaults to 'stdout' if(file == NULL) ) 101 */ 102 void 103 vrna_file_json(const char *seq, 104 const char *db, 105 double energy, 106 const char *identifier, 107 FILE *file); 108 109 110 #endif 111 112 /** 113 * @brief Tell a function that an input is assumed to span several lines 114 * 115 * If used as input-option a function might also be returning this state telling 116 * that it has read data from multiple lines. 117 * 118 * @see vrna_extract_record_rest_structure(), vrna_file_fasta_read_record() 119 * 120 */ 121 #define VRNA_OPTION_MULTILINE 32U 122 /** 123 * @brief parse multiline constraint 124 * @deprecated see vrna_extract_record_rest_structure() 125 */ 126 #define VRNA_CONSTRAINT_MULTILINE 32U 127 128 /** 129 * @brief Get a (fasta) data set from a file or stdin 130 * 131 * This function may be used to obtain complete datasets from a filehandle or stdin. 132 * A dataset is always defined to contain at least a sequence. If data starts with a 133 * fasta header, i.e. a line like 134 * @verbatim >some header info @endverbatim 135 * then vrna_file_fasta_read_record() will assume that the sequence that follows the header may span 136 * over several lines. To disable this behavior and to assign a single line to the argument 137 * 'sequence' one can pass #VRNA_INPUT_NO_SPAN in the 'options' argument. 138 * If no fasta header is read in the beginning of a data block, a sequence must not span over 139 * multiple lines!\n 140 * Unless the options #VRNA_INPUT_NOSKIP_COMMENTS or #VRNA_INPUT_NOSKIP_BLANK_LINES are passed, 141 * a sequence may be interrupted by lines starting with a comment character or empty lines.\n 142 * A sequence is regarded as completely read if it was either assumed to not span over multiple 143 * lines, a secondary structure or structure constraint follows the sequence on the next line, 144 * or a new header marks the beginning of a new sequence...\n 145 * All lines following the sequence (this includes comments) that do not initiate a new dataset 146 * according to the above definition are available through the line-array 'rest'. 147 * Here one can usually find the structure constraint or other information belonging to the 148 * current dataset. Filling of 'rest' may be prevented by passing #VRNA_INPUT_NO_REST to the 149 * options argument.\n 150 * 151 * @note This function will exit any program with an error message if no sequence could be read! 152 * @note This function is NOT threadsafe! It uses a global variable to store information about 153 * the next data block. 154 * 155 * The main purpose of this function is to be able to easily parse blocks of data 156 * in the header of a loop where all calculations for the appropriate data is done inside the 157 * loop. The loop may be then left on certain return values, e.g.: 158 * @code 159 char *id, *seq, **rest; 160 int i; 161 id = seq = NULL; 162 rest = NULL; 163 while(!(vrna_file_fasta_read_record(&id, &seq, &rest, NULL, 0) & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){ 164 if(id) 165 printf("%s\n", id); 166 printf("%s\n", seq); 167 if(rest) 168 for(i=0;rest[i];i++){ 169 printf("%s\n", rest[i]); 170 free(rest[i]); 171 } 172 free(rest); 173 free(seq); 174 free(id); 175 } 176 * @endcode 177 * In the example above, the while loop will be terminated when vrna_file_fasta_read_record() returns 178 * either an error, EOF, or a user initiated quit request.\n 179 * As long as data is read from stdin (we are passing NULL as the file pointer), the id is 180 * printed if it is available for the current block of data. The sequence will be printed in 181 * any case and if some more lines belong to the current block of data each line will be printed 182 * as well. 183 * 184 * @note Do not forget to free the memory occupied by header, sequence and rest! 185 * 186 * @param header A pointer which will be set such that it points to the header of the record 187 * @param sequence A pointer which will be set such that it points to the sequence of the record 188 * @param rest A pointer which will be set such that it points to an array of lines which also belong to the record 189 * @param file A file handle to read from (if NULL, this function reads from stdin) 190 * @param options Some options which may be passed to alter the behavior of the function, use 0 for no options 191 * @return A flag with information about what the function actually did read 192 */ 193 unsigned int 194 vrna_file_fasta_read_record(char **header, 195 char **sequence, 196 char ***rest, 197 FILE *file, 198 unsigned int options); 199 200 201 /** @brief Extract a dot-bracket structure string from (multiline)character array 202 * 203 * This function extracts a dot-bracket structure string from the 'rest' array as 204 * returned by vrna_file_fasta_read_record() and returns it. All occurences of comments within the 205 * 'lines' array will be skipped as long as they do not break the structure string. 206 * If no structure could be read, this function returns NULL. 207 * 208 * @pre The argument 'lines' has to be a 2-dimensional character array as obtained 209 * by vrna_file_fasta_read_record() 210 * @see vrna_file_fasta_read_record() 211 * 212 * @param lines The (multiline) character array to be parsed 213 * @param length The assumed length of the dot-bracket string (passing a value < 1 results in no length limit) 214 * @param option Some options which may be passed to alter the behavior of the function, use 0 for no options 215 * @return The dot-bracket string read from lines or NULL 216 */ 217 char * 218 vrna_extract_record_rest_structure(const char **lines, 219 unsigned int length, 220 unsigned int option); 221 222 223 /** 224 * @brief Read data from a given SHAPE reactivity input file 225 * 226 * This function parses the informations from a given file and stores the result 227 * in the preallocated string sequence and the double array values. 228 * 229 * @param file_name Path to the constraints file 230 * @param length Length of the sequence (file entries exceeding this limit will cause an error) 231 * @param default_value Value for missing indices 232 * @param sequence Pointer to an array used for storing the sequence obtained from the SHAPE reactivity file 233 * @param values Pointer to an array used for storing the values obtained from the SHAPE reactivity file 234 */ 235 int 236 vrna_file_SHAPE_read(const char *file_name, 237 int length, 238 double default_value, 239 char *sequence, 240 double *values); 241 242 243 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY 244 245 /** 246 * @brief Extract a hard constraint encoded as pseudo dot-bracket string 247 * 248 * @deprecated Use vrna_extract_record_rest_structure() instead! 249 * @pre The argument 'lines' has to be a 2-dimensional character array as obtained 250 * by vrna_file_fasta_read_record() 251 * @see vrna_file_fasta_read_record(), #VRNA_CONSTRAINT_DB_PIPE, #VRNA_CONSTRAINT_DB_DOT, #VRNA_CONSTRAINT_DB_X 252 * #VRNA_CONSTRAINT_DB_ANG_BRACK, #VRNA_CONSTRAINT_DB_RND_BRACK 253 * 254 * @param cstruc A pointer to a character array that is used as pseudo dot-bracket 255 * output 256 * @param lines A 2-dimensional character array with the extension lines from the FASTA 257 * input 258 * @param option The option flags that define the behavior and recognition pattern of 259 * this function 260 */ 261 DEPRECATED(void vrna_extract_record_rest_constraint(char **cstruc, 262 const char **lines, 263 unsigned int option), 264 "This function is obsolete"); 265 266 /** @brief Extract a dot-bracket structure string from (multiline)character array 267 * 268 * @deprecated This function is deprecated! Use \fn vrna_extract_record_rest_structure() as a replacment. 269 */ 270 DEPRECATED(char *extract_record_rest_structure(const char **lines, 271 unsigned int length, 272 unsigned int option), 273 "Use vrna_extract_record_rest_structure() instead"); 274 275 /** 276 * @brief Get a data record from stdin 277 * 278 * @deprecated This function is deprecated! Use vrna_file_fasta_read_record() as a replacment. 279 * 280 */ 281 DEPRECATED(unsigned int read_record(char **header, 282 char **sequence, 283 char ***rest, 284 unsigned int options), 285 "Use vrna_file_fasta_read_record() instead"); 286 287 288 DEPRECATED(unsigned int get_multi_input_line(char **string, 289 unsigned int options), 290 "This function is obsolete"); 291 292 #endif 293 294 /** 295 * @} 296 */ 297 298 #endif 299