1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #ifndef _h_pl_regions_
27 #define _h_pl_regions_
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32 
33 #include "pl-tools.h"
34 #include <klib/vector.h>
35 #include <klib/sort.h>
36 #include <klib/rc.h>
37 #include <insdc/sra.h>
38 
39 
40 #define RGN_COLUMN_COUNT 5
41 #define MIN_BIOLOGICAL_LEN 10
42 
43 typedef struct region_type_mapping
44 {
45     int32_t rgn_type_adapter;   /* technical */
46     int32_t rgn_type_insert;    /* biological */
47     int32_t rgn_type_hq;        /* HighQualityRegion */
48     int32_t rgn_type_ga;        /* GlobalAccuracy ??? */
49 
50     uint64_t count_of_unknown_rgn_types;
51 } region_type_mapping;
52 
53 
54 typedef struct region
55 {
56     int32_t spot_id;
57     int32_t type;
58     int32_t start;
59     int32_t end;
60     int32_t filter;
61 } region;
62 
63 
64 typedef struct regions_stat
65 {
66     uint32_t inserts;
67     uint32_t inserts_spots;
68     uint32_t expands_a;
69     uint32_t expands_i;
70     uint32_t expands_spots;
71     uint32_t end_gap;
72     uint32_t overlapps;
73     uint32_t removed;
74 } regions_stat;
75 
76 
77 typedef struct hq_region
78 {
79     uint32_t start;
80     uint32_t end;
81 } hq_region;
82 
83 
84 typedef struct regions
85 {
86     af_data hdf5_regions;
87     Vector read_Regions;
88     Vector sort_Regions;
89     Vector stock_Regions;
90     hq_region hq_rgn;
91     uint64_t offset;
92     uint32_t spot_id;
93     uint32_t spot_len;
94     uint32_t * data_32;
95     uint8_t * data_8;
96     size_t data_32_len;
97     size_t data_8_len;
98 
99     regions_stat stat;
100 
101     int32_t * complete_table;
102     int32_t * table_index;
103 } regions;
104 
105 
106 void rgn_init( regions *rgn );
107 void rgn_free( regions *rgn );
108 
109 static const char def_label[] = "AdapterInsertLowQuality";
110 static const size_t def_label_len = 23;
111 
112 static const uint32_t label_adapter_start    = 0;
113 static const uint32_t label_adapter_len      = 7;
114 static const uint32_t label_insert_start     = 7;
115 static const uint32_t label_insert_len       = 6;
116 static const uint32_t label_lowquality_start = 13;
117 static const uint32_t label_lowquality_len   = 10;
118 
119 rc_t rgn_open( const KDirectory *hdf5_dir, regions *rgn );
120 
121 rc_t rgn_load( regions *rgn, const uint32_t spot_id,
122                region_type_mapping *mapping, const uint32_t spot_len );
123 
124 void rgn_set_filter_value_for_all( regions *rgn, const uint32_t filter_value );
125 
126 rc_t rgn_start_data( regions *rgn, uint32_t *count );
127 rc_t rgn_len_data( regions *rgn, uint32_t *count );
128 rc_t rgn_type_data( regions *rgn, uint32_t *count );
129 rc_t rgn_filter_data( regions *rgn, uint32_t *count );
130 rc_t rgn_label_start_data( regions *rgn, uint32_t *count );
131 rc_t rgn_label_len_data( regions *rgn, uint32_t *count );
132 
133 rc_t rgn_extract_type_mappings( const KNamelist *rgn_names, region_type_mapping *mapping, bool check_completenes );
134 rc_t rgn_show_type_mappings( region_type_mapping *mapping );
135 
136 #ifdef __cplusplus
137 }
138 #endif
139 
140 #endif
141