1 /*=========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government have not placed any restriction on its use or reproduction. 12 * 13 * Although all reasonable efforts have been taken to ensure the accuracy 14 * and reliability of the software and data, the NLM and the U.S. 15 * Government do not and cannot warrant the performance or results that 16 * may be obtained by using this software or data. The NLM and the U.S. 17 * Government disclaim all warranties, express or implied, including 18 * warranties of performance, merchantability or fitness for any particular 19 * purpose. 20 * 21 * Please cite the author in any work or product based on this material. 22 * 23 * =========================================================================== 24 * 25 */ 26 #ifndef _h_pl_regions_ 27 #define _h_pl_regions_ 28 29 #ifdef __cplusplus 30 extern "C" { 31 #endif 32 33 #include "pl-tools.h" 34 #include <klib/vector.h> 35 #include <klib/sort.h> 36 #include <klib/rc.h> 37 #include <insdc/sra.h> 38 39 40 #define RGN_COLUMN_COUNT 5 41 #define MIN_BIOLOGICAL_LEN 10 42 43 typedef struct region_type_mapping 44 { 45 int32_t rgn_type_adapter; /* technical */ 46 int32_t rgn_type_insert; /* biological */ 47 int32_t rgn_type_hq; /* HighQualityRegion */ 48 int32_t rgn_type_ga; /* GlobalAccuracy ??? */ 49 50 uint64_t count_of_unknown_rgn_types; 51 } region_type_mapping; 52 53 54 typedef struct region 55 { 56 int32_t spot_id; 57 int32_t type; 58 int32_t start; 59 int32_t end; 60 int32_t filter; 61 } region; 62 63 64 typedef struct regions_stat 65 { 66 uint32_t inserts; 67 uint32_t inserts_spots; 68 uint32_t expands_a; 69 uint32_t expands_i; 70 uint32_t expands_spots; 71 uint32_t end_gap; 72 uint32_t overlapps; 73 uint32_t removed; 74 } regions_stat; 75 76 77 typedef struct hq_region 78 { 79 uint32_t start; 80 uint32_t end; 81 } hq_region; 82 83 84 typedef struct regions 85 { 86 af_data hdf5_regions; 87 Vector read_Regions; 88 Vector sort_Regions; 89 Vector stock_Regions; 90 hq_region hq_rgn; 91 uint64_t offset; 92 uint32_t spot_id; 93 uint32_t spot_len; 94 uint32_t * data_32; 95 uint8_t * data_8; 96 size_t data_32_len; 97 size_t data_8_len; 98 99 regions_stat stat; 100 101 int32_t * complete_table; 102 int32_t * table_index; 103 } regions; 104 105 106 void rgn_init( regions *rgn ); 107 void rgn_free( regions *rgn ); 108 109 static const char def_label[] = "AdapterInsertLowQuality"; 110 static const size_t def_label_len = 23; 111 112 static const uint32_t label_adapter_start = 0; 113 static const uint32_t label_adapter_len = 7; 114 static const uint32_t label_insert_start = 7; 115 static const uint32_t label_insert_len = 6; 116 static const uint32_t label_lowquality_start = 13; 117 static const uint32_t label_lowquality_len = 10; 118 119 rc_t rgn_open( const KDirectory *hdf5_dir, regions *rgn ); 120 121 rc_t rgn_load( regions *rgn, const uint32_t spot_id, 122 region_type_mapping *mapping, const uint32_t spot_len ); 123 124 void rgn_set_filter_value_for_all( regions *rgn, const uint32_t filter_value ); 125 126 rc_t rgn_start_data( regions *rgn, uint32_t *count ); 127 rc_t rgn_len_data( regions *rgn, uint32_t *count ); 128 rc_t rgn_type_data( regions *rgn, uint32_t *count ); 129 rc_t rgn_filter_data( regions *rgn, uint32_t *count ); 130 rc_t rgn_label_start_data( regions *rgn, uint32_t *count ); 131 rc_t rgn_label_len_data( regions *rgn, uint32_t *count ); 132 133 rc_t rgn_extract_type_mappings( const KNamelist *rgn_names, region_type_mapping *mapping, bool check_completenes ); 134 rc_t rgn_show_type_mappings( region_type_mapping *mapping ); 135 136 #ifdef __cplusplus 137 } 138 #endif 139 140 #endif 141