1 #ifndef VIENNA_RNA_PACKAGE_FILE_FORMATS_H
2 #define VIENNA_RNA_PACKAGE_FILE_FORMATS_H
3 
4 #ifdef VRNA_WARN_DEPRECATED
5 # if defined(__clang__)
6 #  define DEPRECATED(func, msg) func __attribute__ ((deprecated("", msg)))
7 # elif defined(__GNUC__)
8 #  define DEPRECATED(func, msg) func __attribute__ ((deprecated(msg)))
9 # else
10 #  define DEPRECATED(func, msg) func
11 # endif
12 #else
13 # define DEPRECATED(func, msg) func
14 #endif
15 
16 /**
17  *  @file     ViennaRNA/io/file_formats.h
18  *  @ingroup  file_utils, file_formats
19  *  @brief    Read and write different file formats for RNA sequences, structures
20  */
21 
22 /**
23  *  @addtogroup  file_formats
24  *  @{
25  *  @brief  Functions to read/write different file formats for nucleic acid sequences and secondary structures
26  */
27 
28 #include <stdio.h>
29 
30 #include <ViennaRNA/datastructures/basic.h>
31 
32 /**
33  *  @brief Print a secondary structure as helix list
34  *
35  *  @param  seq     The RNA sequence
36  *  @param  db      The structure in dot-bracket format
37  *  @param  energy  Free energy of the structure in kcal/mol
38  *  @param  file    The file handle used to print to (print defaults to 'stdout' if(file == NULL) )
39  */
40 void
41 vrna_file_helixlist(const char  *seq,
42                     const char  *db,
43                     float       energy,
44                     FILE        *file);
45 
46 
47 /**
48  *  @brief Print a secondary structure as connect table
49  *
50  *  Connect table file format looks like this:
51  * @verbatim
52  * 300  ENERGY = 7.0  example
53  * 1 G       0    2   22    1
54  * 2 G       1    3   21    2
55  * @endverbatim
56  *  where the headerline is followed by 6 columns with:
57  *  1. Base number: index n
58  *  2. Base (A, C, G, T, U, X)
59  *  3. Index n-1  (0 if first nucleotide)
60  *  4. Index n+1  (0 if last nucleotide)
61  *  5. Number of the base to which n is paired. No pairing is indicated by 0 (zero).
62  *  6. Natural numbering.
63  *
64  *  @param  seq         The RNA sequence
65  *  @param  db          The structure in dot-bracket format
66  *  @param  energy      The free energy of the structure
67  *  @param  identifier  An optional identifier for the sequence
68  *  @param  file  The file handle used to print to (print defaults to 'stdout' if(file == NULL) )
69  */
70 void
71 vrna_file_connect(const char  *seq,
72                   const char  *db,
73                   float       energy,
74                   const char  *identifier,
75                   FILE        *file);
76 
77 
78 /**
79  *  @brief Print a secondary structure in bpseq format
80  *
81  *  @param  seq         The RNA sequence
82  *  @param  db          The structure in dot-bracket format
83  *  @param  file  The file handle used to print to (print defaults to 'stdout' if(file == NULL) )
84  */
85 void
86 vrna_file_bpseq(const char  *seq,
87                 const char  *db,
88                 FILE        *file);
89 
90 
91 #if VRNA_WITH_JSON_SUPPORT
92 
93 /**
94  *  @brief Print a secondary structure in jsonformat
95  *
96  *  @param  seq         The RNA sequence
97  *  @param  db          The structure in dot-bracket format
98  *  @param  energy      The free energy
99  *  @param  identifier  An identifier for the sequence
100  *  @param  file  The file handle used to print to (print defaults to 'stdout' if(file == NULL) )
101  */
102 void
103 vrna_file_json(const char *seq,
104                const char *db,
105                double     energy,
106                const char *identifier,
107                FILE       *file);
108 
109 
110 #endif
111 
112 /**
113  *  @brief  Tell a function that an input is assumed to span several lines
114  *
115  *  If used as input-option a function might also be returning this state telling
116  *  that it has read data from multiple lines.
117  *
118  *  @see vrna_extract_record_rest_structure(), vrna_file_fasta_read_record()
119  *
120  */
121 #define VRNA_OPTION_MULTILINE             32U
122 /**
123  *  @brief parse multiline constraint
124  *  @deprecated see vrna_extract_record_rest_structure()
125  */
126 #define VRNA_CONSTRAINT_MULTILINE         32U
127 
128 /**
129  *  @brief  Get a (fasta) data set from a file or stdin
130  *
131  *  This function may be used to obtain complete datasets from a filehandle or stdin.
132  *  A dataset is always defined to contain at least a sequence. If data starts with a
133  *  fasta header, i.e. a line like
134  *  @verbatim >some header info @endverbatim
135  *  then vrna_file_fasta_read_record() will assume that the sequence that follows the header may span
136  *  over several lines. To disable this behavior and to assign a single line to the argument
137  *  'sequence' one can pass #VRNA_INPUT_NO_SPAN in the 'options' argument.
138  *  If no fasta header is read in the beginning of a data block, a sequence must not span over
139  *  multiple lines!\n
140  *  Unless the options #VRNA_INPUT_NOSKIP_COMMENTS or #VRNA_INPUT_NOSKIP_BLANK_LINES are passed,
141  *  a sequence may be interrupted by lines starting with a comment character or empty lines.\n
142  *  A sequence is regarded as completely read if it was either assumed to not span over multiple
143  *  lines, a secondary structure or structure constraint follows the sequence on the next line,
144  *  or a new header marks the beginning of a new sequence...\n
145  *  All lines following the sequence (this includes comments) that do not initiate a new dataset
146  *  according to the above definition are available through the line-array 'rest'.
147  *  Here one can usually find the structure constraint or other information belonging to the
148  *  current dataset. Filling of 'rest' may be prevented by passing #VRNA_INPUT_NO_REST to the
149  *  options argument.\n
150  *
151  *  @note This function will exit any program with an error message if no sequence could be read!
152  *  @note This function is NOT threadsafe! It uses a global variable to store information about
153  *  the next data block.
154  *
155  *  The main purpose of this function is to be able to easily parse blocks of data
156  *  in the header of a loop where all calculations for the appropriate data is done inside the
157  *  loop. The loop may be then left on certain return values, e.g.:
158  *  @code
159 char *id, *seq, **rest;
160 int  i;
161 id = seq = NULL;
162 rest = NULL;
163 while(!(vrna_file_fasta_read_record(&id, &seq, &rest, NULL, 0) & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){
164   if(id)
165     printf("%s\n", id);
166   printf("%s\n", seq);
167   if(rest)
168     for(i=0;rest[i];i++){
169       printf("%s\n", rest[i]);
170       free(rest[i]);
171     }
172   free(rest);
173   free(seq);
174   free(id);
175 }
176  *  @endcode
177  *  In the example above, the while loop will be terminated when vrna_file_fasta_read_record() returns
178  *  either an error, EOF, or a user initiated quit request.\n
179  *  As long as data is read from stdin (we are passing NULL as the file pointer), the id is
180  *  printed if it is available for the current block of data. The sequence will be printed in
181  *  any case and if some more lines belong to the current block of data each line will be printed
182  *  as well.
183  *
184  *  @note Do not forget to free the memory occupied by header, sequence and rest!
185  *
186  *  @param  header    A pointer which will be set such that it points to the header of the record
187  *  @param  sequence  A pointer which will be set such that it points to the sequence of the record
188  *  @param  rest      A pointer which will be set such that it points to an array of lines which also belong to the record
189  *  @param  file      A file handle to read from (if NULL, this function reads from stdin)
190  *  @param  options   Some options which may be passed to alter the behavior of the function, use 0 for no options
191  *  @return           A flag with information about what the function actually did read
192  */
193 unsigned int
194 vrna_file_fasta_read_record(char          **header,
195                             char          **sequence,
196                             char          ***rest,
197                             FILE          *file,
198                             unsigned int  options);
199 
200 
201 /** @brief Extract a dot-bracket structure string from (multiline)character array
202  *
203  * This function extracts a dot-bracket structure string from the 'rest' array as
204  * returned by vrna_file_fasta_read_record() and returns it. All occurences of comments within the
205  * 'lines' array will be skipped as long as they do not break the structure string.
206  * If no structure could be read, this function returns NULL.
207  *
208  * @pre      The argument 'lines' has to be a 2-dimensional character array as obtained
209  *            by vrna_file_fasta_read_record()
210  * @see vrna_file_fasta_read_record()
211  *
212  * @param lines   The (multiline) character array to be parsed
213  * @param length  The assumed length of the dot-bracket string (passing a value < 1 results in no length limit)
214  * @param option  Some options which may be passed to alter the behavior of the function, use 0 for no options
215  * @return        The dot-bracket string read from lines or NULL
216  */
217 char *
218 vrna_extract_record_rest_structure(const char   **lines,
219                                    unsigned int length,
220                                    unsigned int option);
221 
222 
223 /**
224  * @brief Read data from a given SHAPE reactivity input file
225  *
226  * This function parses the informations from a given file and stores the result
227  * in the preallocated string sequence and the double array values.
228  *
229  * @param file_name     Path to the constraints file
230  * @param length        Length of the sequence (file entries exceeding this limit will cause an error)
231  * @param default_value Value for missing indices
232  * @param sequence      Pointer to an array used for storing the sequence obtained from the SHAPE reactivity file
233  * @param values        Pointer to an array used for storing the values obtained from the SHAPE reactivity file
234  */
235 int
236 vrna_file_SHAPE_read(const char *file_name,
237                      int        length,
238                      double     default_value,
239                      char       *sequence,
240                      double     *values);
241 
242 
243 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
244 
245 /**
246  *  @brief  Extract a hard constraint encoded as pseudo dot-bracket string
247  *
248  *  @deprecated     Use vrna_extract_record_rest_structure() instead!
249  *  @pre      The argument 'lines' has to be a 2-dimensional character array as obtained
250  *            by vrna_file_fasta_read_record()
251  *  @see      vrna_file_fasta_read_record(), #VRNA_CONSTRAINT_DB_PIPE, #VRNA_CONSTRAINT_DB_DOT, #VRNA_CONSTRAINT_DB_X
252  *            #VRNA_CONSTRAINT_DB_ANG_BRACK, #VRNA_CONSTRAINT_DB_RND_BRACK
253  *
254  *  @param  cstruc  A pointer to a character array that is used as pseudo dot-bracket
255  *                  output
256  *  @param  lines   A 2-dimensional character array with the extension lines from the FASTA
257  *                  input
258  *  @param  option  The option flags that define the behavior and recognition pattern of
259  *                  this function
260  */
261 DEPRECATED(void vrna_extract_record_rest_constraint(char          **cstruc,
262                                                     const char    **lines,
263                                                     unsigned int  option),
264            "This function is obsolete");
265 
266 /** @brief Extract a dot-bracket structure string from (multiline)character array
267  *
268  * @deprecated This function is deprecated! Use \fn vrna_extract_record_rest_structure() as a replacment.
269  */
270 DEPRECATED(char *extract_record_rest_structure(const char   **lines,
271                                                unsigned int length,
272                                                unsigned int option),
273            "Use vrna_extract_record_rest_structure() instead");
274 
275 /**
276  *  @brief  Get a data record from stdin
277  *
278  *  @deprecated This function is deprecated! Use vrna_file_fasta_read_record() as a replacment.
279  *
280  */
281 DEPRECATED(unsigned int read_record(char          **header,
282                                     char          **sequence,
283                                     char          ***rest,
284                                     unsigned int  options),
285            "Use vrna_file_fasta_read_record() instead");
286 
287 
288 DEPRECATED(unsigned int get_multi_input_line(char         **string,
289                                              unsigned int options),
290            "This function is obsolete");
291 
292 #endif
293 
294 /**
295  * @}
296  */
297 
298 #endif
299