1 /****************************************************************\ 2 * * 3 * Library for manipulation of exonerate index files * 4 * * 5 * Guy St.C. Slater.. mailto:guy@ebi.ac.uk * 6 * Copyright (C) 2000-2009. All Rights Reserved. * 7 * * 8 * This source code is distributed under the terms of the * 9 * GNU General Public License, version 3. See the file COPYING * 10 * or http://www.gnu.org/licenses/gpl.txt for details * 11 * * 12 * If you use this code, please keep this notice intact. * 13 * * 14 \****************************************************************/ 15 16 #ifndef INCLUDED_INDEX_H 17 #define INCLUDED_INDEX_H 18 19 #ifdef __cplusplus 20 extern "C" { 21 #endif /* __cplusplus */ 22 23 #include <stdio.h> 24 #include <glib.h> 25 #include <sys/types.h> 26 #include <unistd.h> 27 28 #ifdef USE_PTHREADS 29 #include <pthread.h> 30 #endif /* USE_PTHREADS */ 31 32 #include "dataset.h" 33 #include "vfsm.h" 34 #include "hspset.h" 35 #include "bitarray.h" 36 37 /* File format: 38 Header 39 dataset path\n 40 FW Strand 41 RV Strand (if translated) 42 43 Strand: 44 Strand header 45 WordList: 46 For each word 47 word_id <MW> 48 freq_count <MI> 49 index_offset <TI> 50 Index: 51 sequence <NS> 52 pos <MS> (from dataset) 53 */ 54 55 typedef struct { 56 guint64 magic; 57 guint64 version; 58 guint64 type; /* plain | trans */ 59 guint64 dataset_path_len; 60 /**/ 61 guint64 word_length; 62 guint64 word_jump; 63 guint64 word_ambiguity; 64 guint64 saturate_threshold; 65 } Index_Header; 66 67 typedef struct { 68 gint max_word_width; /* From vfsm->lrw : MW */ 69 gint number_of_seqs_width; /* From dataset->header->number_of_seqs : NS */ 70 } Index_Width; 71 72 typedef struct { 73 gint sequence_id; 74 gint position; 75 } Index_Address; 76 77 typedef struct { 78 gint freq_count; 79 gint64 index_offset; 80 } Index_Word; 81 82 typedef struct { 83 guint64 max_index_length; /* Filled by Index_survey_word_list() */ 84 guint64 word_list_length; /* Filled by Index_survey_word_list() */ 85 guint64 total_index_length; /* Filled by Index_find_offsets() */ 86 } Index_Strand_Header; 87 88 typedef struct { 89 gint max_index_len_width; /* From max_index_length : MI */ 90 gint total_index_len_width; /* From total_index_length : TI */ 91 } Index_Strand_Width; 92 93 typedef struct { 94 Index_Strand_Header header; 95 Index_Strand_Width width; 96 /**/ 97 gint *word_table; /* VFSM array */ 98 Index_Word *word_list; 99 off_t strand_offset; /* Offset to strand header */ 100 BitArray *index_cache; 101 } Index_Strand; 102 103 typedef struct { 104 guint ref_count; 105 FILE *fp; 106 gchar *dataset_path; 107 Dataset *dataset; 108 Index_Header *header; 109 VFSM *vfsm; 110 Index_Width *width; 111 /**/ 112 Index_Strand *forward; 113 Index_Strand *revcomp; /* Only used when index is translated */ 114 #ifdef USE_PTHREADS 115 pthread_mutex_t index_mutex; 116 #endif /* USE_PTHREADS */ 117 } Index; 118 119 /**/ 120 121 Index *Index_create(Dataset *dataset, gboolean is_translated, gint word_length, 122 gint word_jump, gint word_ambiguity, gint saturate_threshold, 123 gchar *index_path, gchar *dataset_path, gint memory_limit); 124 Index *Index_share(Index *index); 125 void Index_destroy(Index *index); 126 void Index_info(Index *index); 127 Index *Index_open(gchar *path); 128 guint64 Index_memory_usage(Index *index); 129 void Index_preload_index(Index *index); 130 gboolean Index_check_filetype(gchar *path); 131 /* Returns TRUE when magic number is correct for this filetype */ 132 133 typedef struct { 134 HSPset *hsp_set; 135 gint target_id; 136 } Index_HSPset; 137 138 void Index_HSPset_destroy(Index_HSPset *index_hsp_set); 139 140 GPtrArray *Index_get_HSPsets(Index *index, HSP_Param *hsp_param, 141 Sequence *query, gboolean revcomp_target); 142 /* Returns a GPtrArray containing Index_HSPset structs */ 143 144 GPtrArray *Index_get_HSPsets_geneseed(Index *index, HSP_Param *hsp_param, 145 Sequence *query, gboolean revcomp_target, 146 gint geneseed_threshold, gint geneseed_repeat, 147 gint max_query_span, gint max_target_span); 148 149 /**/ 150 151 #ifdef __cplusplus 152 } 153 #endif /* __cplusplus */ 154 155 #endif /* INCLUDED_INDEX_H */ 156 157