1 /****************************************************************\ 2 * * 3 * Library for manipulation of FASTA format databases * 4 * * 5 * Guy St.C. Slater.. mailto:guy@ebi.ac.uk * 6 * Copyright (C) 2000-2009. All Rights Reserved. * 7 * * 8 * This source code is distributed under the terms of the * 9 * GNU General Public License, version 3. See the file COPYING * 10 * or http://www.gnu.org/licenses/gpl.txt for details * 11 * * 12 * If you use this code, please keep this notice intact. * 13 * * 14 \****************************************************************/ 15 16 #ifndef INCLUDED_FASTADB_H 17 #define INCLUDED_FASTADB_H 18 19 #ifdef __cplusplus 20 extern "C" { 21 #endif /* __cplusplus */ 22 23 #include <stdio.h> 24 #include <glib.h> 25 26 #include "compoundfile.h" 27 #include "sequence.h" 28 #include "argument.h" 29 #include "sparsecache.h" 30 31 typedef struct { 32 gchar *suffix_filter; 33 } FastaDB_ArgumentSet; 34 35 FastaDB_ArgumentSet *FastaDB_ArgumentSet_create(Argument *arg); 36 37 typedef enum { 38 FastaDB_Mask_ID = (1<<1), 39 FastaDB_Mask_DEF = (1<<2), 40 FastaDB_Mask_SEQ = (1<<3), 41 FastaDB_Mask_LEN = (1<<4), 42 FastaDB_Mask_ALL = (~0) 43 } FastaDB_Mask; 44 45 typedef struct FastaDB { 46 guint ref_count; 47 Alphabet *alphabet; 48 CompoundFile *cf; 49 gchar *out_buffer; 50 guint out_buffer_pos; 51 guint out_buffer_alloc; 52 gint line_length; 53 } FastaDB; 54 /* line_length is used for fasta file random-access 55 * it is set to zero for irregular line lengths 56 * is should not be used until the entire file has been parsed. 57 */ 58 59 typedef struct { 60 guint ref_count; 61 FastaDB *source; 62 CompoundFile_Location *location; 63 Sequence *seq; 64 } FastaDB_Seq; 65 66 typedef gboolean (*FastaDB_TraverseFunc)(FastaDB_Seq *fdbs, 67 gpointer user_data); 68 /* Return TRUE to stop the traversal */ 69 70 FastaDB *FastaDB_open_list(GPtrArray *path_list, 71 Alphabet *alphabet); 72 FastaDB *FastaDB_open_list_with_limit(GPtrArray *path_list, 73 Alphabet *alphabet, gint chunk_id, gint chunk_total); 74 FastaDB *FastaDB_open(gchar *path, Alphabet *alphabet); 75 FastaDB *FastaDB_share(FastaDB *fdb); 76 FastaDB *FastaDB_dup(FastaDB *fdb); /* For use in a separate thread */ 77 void FastaDB_close(FastaDB *fdb); 78 void FastaDB_rewind(FastaDB *fdb); 79 gboolean FastaDB_is_finished(FastaDB *fdb); 80 void FastaDB_traverse(FastaDB *fdb, FastaDB_Mask mask, 81 FastaDB_TraverseFunc fdtf, gpointer user_data); 82 gsize FastaDB_memory_usage(FastaDB *fdb); 83 84 FastaDB_Seq *FastaDB_next(FastaDB *fdb, FastaDB_Mask mask); 85 CompoundFile_Pos FastaDB_find_next_start(FastaDB *fdb, 86 CompoundFile_Pos pos); 87 88 gboolean FastaDB_file_is_fasta(gchar *path); 89 /* Returns true if first non-whitespace character in file is '>' */ 90 91 typedef struct { 92 FastaDB *source; 93 CompoundFile_Location *location; 94 Sequence_Strand strand; 95 gint seq_offset; /* for random access */ 96 gint length; /* for random access */ 97 } FastaDB_Key; 98 99 FastaDB_Seq *FastaDB_fetch(FastaDB *fdb, FastaDB_Mask mask, 100 CompoundFile_Pos pos); 101 102 FastaDB_Key *FastaDB_Key_create(FastaDB *source, 103 CompoundFile_Location *location, 104 Sequence_Strand strand, 105 gint seq_offset, gint length); 106 FastaDB_Key *FastaDB_Seq_get_key(FastaDB_Seq *fdbs); 107 FastaDB_Seq *FastaDB_Key_get_seq(FastaDB_Key *fdbk, FastaDB_Mask mask); 108 void FastaDB_Key_destroy(FastaDB_Key *fdbk); 109 gchar *FastaDB_Key_get_def(FastaDB_Key *fdbk); 110 SparseCache *FastaDB_Key_get_SparseCache(FastaDB_Key *fdbk); 111 void FastaDB_SparseCache_compress(SparseCache_Page *page, gint len); 112 113 114 FastaDB_Seq **FastaDB_all(gchar *path, Alphabet *alphabet, 115 FastaDB_Mask mask, guint *total); 116 117 FastaDB_Seq *FastaDB_Seq_share(FastaDB_Seq *fdbs); 118 void FastaDB_Seq_destroy(FastaDB_Seq *fdbs); 119 FastaDB_Seq *FastaDB_Seq_revcomp(FastaDB_Seq *fdbs); 120 void FastaDB_Seq_all_destroy(FastaDB_Seq **fdbs); 121 122 gint FastaDB_Seq_print(FastaDB_Seq *fdbs, FILE *fp, 123 FastaDB_Mask mask); 124 gint FastaDB_Seq_all_print(FastaDB_Seq **fdbs, FILE *fp, 125 FastaDB_Mask mask); 126 127 FastaDB_Seq *FastaDB_get_single(gchar *path, Alphabet *alphabet); 128 Alphabet_Type FastaDB_guess_type(gchar *path); 129 130 #ifdef __cplusplus 131 } 132 #endif /* __cplusplus */ 133 134 #endif /* INCLUDED_FASTADB_H */ 135 136