1 /*==============================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 */
25 
26 #include "debug.h"
27 
28 #include "factory-cmn.h"
29 #include "factory-evidence-dnbs.h"
30 
31 typedef struct CGEvidenceDnbs15 CGEvidenceDnbs15;
32 #define CGFILETYPE_IMPL CGEvidenceDnbs15
33 #include "file.h"
34 
35 #include <klib/printf.h>
36 #include <klib/rc.h>
37 
38 #include <sysalloc.h>
39 
40 #include <assert.h>
41 #include <stdlib.h>
42 #include <string.h>
43 
44 struct CGEvidenceDnbs15 {
45     CGFileType dad;
46     const CGLoaderFile* file;
47     uint64_t records;
48     /* headers */
49     CGFIELD15_ASSEMBLY_ID assembly_id;
50     CGFIELD15_CHROMOSOME chromosome;
51     CGFIELD15_GENERATED_AT generated_at;
52     CGFIELD15_GENERATED_BY generated_by;
53     CGFIELD15_SAMPLE sample;
54     CGFIELD15_SOFTWARE_VERSION software_version;
55 };
56 
CGEvidenceDnbs15_Release(const CGEvidenceDnbs15 * cself,uint64_t * records)57 void CGEvidenceDnbs15_Release(const CGEvidenceDnbs15* cself, uint64_t* records)
58 {
59     if( cself != NULL ) {
60         CGEvidenceDnbs15* self = (CGEvidenceDnbs15*)cself;
61         if( records != NULL ) {
62             *records = cself->records;
63         }
64         free(self);
65     }
66 }
67 
68 static
CGEvidenceDnbs15_Header(const CGEvidenceDnbs15 * cself,const char * buf,const size_t len)69 rc_t CC CGEvidenceDnbs15_Header(const CGEvidenceDnbs15* cself, const char* buf, const size_t len)
70 {
71     rc_t rc = 0;
72     size_t slen;
73     CGEvidenceDnbs15* self = (CGEvidenceDnbs15*)cself;
74 
75     if( strncmp("ASSEMBLY_ID\t", buf, slen = 12) == 0 ) {
76         rc = str2buf(&buf[slen], len - slen, self->assembly_id, sizeof(self->assembly_id));
77     } else if( strncmp("CHROMOSOME\t", buf, slen = 11) == 0 ) {
78         rc = str2buf(&buf[slen], len - slen, self->chromosome, sizeof(self->chromosome));
79     } else if( strncmp("GENERATED_AT\t", buf, slen = 13) == 0 ) {
80         rc = str2buf(&buf[slen], len - slen, self->generated_at, sizeof(self->generated_at));
81     } else if( strncmp("GENERATED_BY\t", buf, slen = 13) == 0 ) {
82         rc = str2buf(&buf[slen], len - slen, self->generated_by, sizeof(self->generated_by));
83     } else if( strncmp("SAMPLE\t", buf, slen = 7) == 0 ) {
84         rc = str2buf(&buf[slen], len - slen, self->sample, sizeof(self->sample));
85     } else if( strncmp("SOFTWARE_VERSION\t", buf, slen = 17) == 0 ) {
86         rc = str2buf(&buf[slen], len - slen, self->software_version, sizeof(self->software_version));
87     } else {
88         rc = RC(rcRuntime, rcFile, rcConstructing, rcName, rcUnrecognized);
89     }
90     return rc;
91 }
92 
CGEvidenceDnbs25_Header(const CGEvidenceDnbs15 * cself,const char * buf,const size_t len)93 static rc_t CC CGEvidenceDnbs25_Header(
94     const CGEvidenceDnbs15* cself, const char* buf, const size_t len)
95 {
96     rc_t rc = 0;
97     size_t slen;
98     CGEvidenceDnbs15* self = (CGEvidenceDnbs15*)cself;
99 
100     /* from SRA-2617 files */
101     if      (strncmp("APPROVAL\t", buf, slen = 9) == 0) {
102     }
103     else if (strncmp("TITLE\t", buf, slen = 6) == 0) {
104     }
105     else if (strncmp("ADDRESS\t", buf, slen = 8) == 0) {
106     }
107 
108     /* From Table 1: Header Metadata Present in all Data Files */
109     else if (strncmp("CUSTOMER_SAMPLE_ID\t", buf, slen = 19) == 0) {
110     }
111     else if (strncmp("SAMPLE_SOURCE\t", buf, slen = 14) == 0) {
112     }
113     else if (strncmp("REPORTED_GENDER\t", buf, slen = 16) == 0) {
114     }
115     else if (strncmp("CALLED_GENDER\t", buf, slen = 14) == 0) {
116     }
117     else if (strncmp("TUMOR_STATUS\t", buf, slen = 13) == 0) {
118     }
119     else if (strncmp("LIBRARY_TYPE\t", buf, slen = 13) == 0) {
120     }
121     else if (strncmp("LIBRARY_SOURCE\t", buf, slen = 13) == 0) {
122     }
123 
124     else if (strncmp("ASSEMBLY_ID\t", buf, slen = 12) == 0) {
125         rc = str2buf(&buf[slen], len - slen,
126             self->assembly_id, sizeof(self->assembly_id));
127     }
128     else if (strncmp("CHROMOSOME\t", buf, slen = 11) == 0) {
129         rc = str2buf(&buf[slen], len - slen,
130             self->chromosome, sizeof(self->chromosome));
131     }
132     else if (strncmp("GENERATED_AT\t", buf, slen = 13) == 0) {
133         rc = str2buf(&buf[slen], len - slen,
134             self->generated_at, sizeof(self->generated_at));
135     }
136     else if (strncmp("GENERATED_BY\t", buf, slen = 13) == 0) {
137         rc = str2buf(&buf[slen], len - slen,
138             self->generated_by, sizeof(self->generated_by));
139     }
140     else if (strncmp("SAMPLE\t", buf, slen = 7) == 0) {
141         rc = str2buf(&buf[slen], len - slen,
142             self->sample, sizeof(self->sample));
143     }
144     else if (strncmp("SOFTWARE_VERSION\t", buf, slen = 17) == 0) {
145         rc = str2buf(&buf[slen], len - slen,
146             self->software_version, sizeof(self->software_version));
147     }
148     else {
149         rc = RC(rcRuntime, rcFile, rcConstructing, rcName, rcUnrecognized);
150     }
151 
152     return rc;
153 }
154 
155 static
CGEvidenceDnbs15_GetAssemblyId(const CGEvidenceDnbs15 * cself,const CGFIELD_ASSEMBLY_ID_TYPE ** assembly_id)156 rc_t CGEvidenceDnbs15_GetAssemblyId(const CGEvidenceDnbs15* cself, const CGFIELD_ASSEMBLY_ID_TYPE** assembly_id)
157 {
158     if( cself->assembly_id[0] == '\0' ) {
159         return RC(rcRuntime, rcFile, rcReading, rcFormat, rcInvalid);
160     }
161     *assembly_id = cself->assembly_id;
162     return 0;
163 }
164 
165 static
CGEvidenceDnbs15_GetSample(const CGEvidenceDnbs15 * cself,const CGFIELD_SAMPLE_TYPE ** sample)166 rc_t CGEvidenceDnbs15_GetSample(const CGEvidenceDnbs15* cself, const CGFIELD_SAMPLE_TYPE** sample)
167 {
168     if( cself->sample[0] == '\0' ) {
169         return RC(rcRuntime, rcFile, rcReading, rcFormat, rcInvalid);
170     }
171     *sample = cself->sample;
172     return 0;
173 }
174 
175 static
CGEvidenceDnbs15_GetChromosome(const CGEvidenceDnbs15 * cself,const CGFIELD_CHROMOSOME_TYPE ** chromosome)176 rc_t CGEvidenceDnbs15_GetChromosome(const CGEvidenceDnbs15* cself, const CGFIELD_CHROMOSOME_TYPE** chromosome)
177 {
178     if( cself->chromosome[0] == '\0' ) {
179         return RC(rcRuntime, rcFile, rcReading, rcFormat, rcInvalid);
180     }
181     *chromosome = cself->chromosome;
182     return 0;
183 }
184 
CGEvidenceDnbs_Read(const CGEvidenceDnbs15 * cself,const char * interval_id,TEvidenceDnbsData * data,int score_allele_num)185 static rc_t CC CGEvidenceDnbs_Read(
186     const CGEvidenceDnbs15* cself, const char* interval_id,
187     TEvidenceDnbsData* data, int score_allele_num)
188 {
189     rc_t rc = 0;
190     TEvidenceDnbsData_dnb* m = NULL;
191     static TEvidenceDnbsData_dnb next_rec;
192     static char next_interval_id[32] = "";
193 
194     /* local copy of unused TEvidenceDnbsData_dnb struct elements */
195     char reference_alignment[CG_EVDNC_ALLELE_CIGAR_LEN];
196     INSDC_coord_zero mate_offset_in_reference;
197     char mate_reference_alignment[CG_EVDNC_ALLELE_CIGAR_LEN];
198     uint16_t score_allele[4] = {0, 0, 0, 0}; /* v1.5 has ScoreAllele[012]; v2.0 - [0123] */
199     char qual[CG_EVDNC_SPOT_LEN];
200 
201     strcpy(data->interval_id, interval_id);
202     data->qty = 0;
203     /* already read one rec for this interval_id */
204     if( next_interval_id[0] != '\0' ) {
205         if( strcmp(next_interval_id, interval_id) != 0 ) {
206             /* nothing todo since next interval id is different */
207             return rc;
208         }
209         m = &data->dnbs[data->qty++];
210         memmove(m, &next_rec, sizeof(next_rec));
211         DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
212                         "\t%i\tnot_used\t0\tnot_used\t%c\t0\t0\t0\t'%.*s'\t'--'\n",
213             data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
214             m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
215             m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
216             m->mapping_quality, m->read_len, m->read));
217     }
218     do {
219         int i = 0;
220         char tmp[2];
221         CG_LINE_START(cself->file, b, len, p);
222         if( b == NULL || len == 0 ) {
223             next_interval_id[0] = '\0';
224             break; /* EOF */
225         }
226         if( data->qty >= data->max_qty ) {
227             TEvidenceDnbsData_dnb* x;
228             data->max_qty += 100;
229             x = realloc(data->dnbs, sizeof(*(data->dnbs)) * data->max_qty);
230             if( x == NULL ) {
231                 rc = RC(rcRuntime, rcFile, rcReading, rcMemory, rcExhausted);
232                 break;
233             }
234             data->dnbs = x;
235         }
236         m = &data->dnbs[data->qty++];
237 
238         /*DEBUG_MSG(10, ("%2hu evidenceDnbs: '%.*s'\n", data->qty, len, b));*/
239         CG_LINE_NEXT_FIELD(b, len, p);
240         rc = str2buf(b, p - b, next_interval_id, sizeof(next_interval_id));
241         CG_LINE_NEXT_FIELD(b, len, p);
242         rc = str2buf(b, p - b, m->chr, sizeof(m->chr));
243         CG_LINE_NEXT_FIELD(b, len, p);
244         rc = str2buf(b, p - b, m->slide, sizeof(m->slide));
245         CG_LINE_NEXT_FIELD(b, len, p);
246         rc = str2buf(b, p - b, m->lane, sizeof(m->lane));
247         CG_LINE_NEXT_FIELD(b, len, p);
248         rc = str2u32(b, p - b, &m->file_num_in_lane);
249         CG_LINE_NEXT_FIELD(b, len, p);
250         rc = str2u64(b, p - b, &m->dnb_offset_in_lane_file);
251         CG_LINE_NEXT_FIELD(b, len, p);
252         rc = str2u16(b, p - b, &m->allele_index);
253         CG_LINE_NEXT_FIELD(b, len, p);
254         rc = str2buf(b, p - b, tmp, sizeof(tmp));
255         if( tmp[0] != 'L' && tmp[0] != 'R' ) {
256             rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
257         }
258         m->side = tmp[0];
259         CG_LINE_NEXT_FIELD(b, len, p);
260         rc = str2buf(b, p - b, tmp, sizeof(tmp));
261         if( tmp[0] != '+' && tmp[0] != '-' ) {
262             rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
263         }
264         m->strand = tmp[0];
265         CG_LINE_NEXT_FIELD(b, len, p);
266         rc = str2i32(b, p - b, &m->offset_in_allele);
267         CG_LINE_NEXT_FIELD(b, len, p);
268         rc = str2buf(b, p - b, m->allele_alignment, sizeof(m->allele_alignment));
269         m->allele_alignment_length = p - b;
270         CG_LINE_NEXT_FIELD(b, len, p);
271         rc = str2i32(b, p - b, &m->offset_in_reference);
272         CG_LINE_NEXT_FIELD(b, len, p);
273         rc = str2buf(b, p - b, reference_alignment, sizeof(reference_alignment));
274         CG_LINE_NEXT_FIELD(b, len, p);
275         rc = str2i32(b, p - b, &mate_offset_in_reference);
276         CG_LINE_NEXT_FIELD(b, len, p);
277         rc = str2buf(b, p - b, mate_reference_alignment, sizeof(mate_reference_alignment));
278         CG_LINE_NEXT_FIELD(b, len, p);
279         rc = str2buf(b, p - b, tmp, sizeof(tmp));
280         if( tmp[0] < 33 || tmp[0] > 126 ) {
281             rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
282         }
283         m->mapping_quality = tmp[0];
284         for (i = 0; i < score_allele_num; ++i) {
285             CG_LINE_NEXT_FIELD(b, len, p);
286             rc = str2u16(b, p - b, &score_allele[i]);
287 	    if(rc){
288 		score_allele[i] =0;
289 		rc =0;
290 	    }
291         }
292         CG_LINE_NEXT_FIELD(b, len, p);
293         m->read_len = p - b;
294         rc = str2buf(b, m->read_len, m->read, sizeof(m->read));
295         CG_LINE_LAST_FIELD(b, len, p);
296         if( m->read_len != p - b ) {
297             rc = RC(rcRuntime, rcFile, rcReading, rcData, rcInconsistent);
298         } else {
299             rc = str2buf(b, p - b, qual, sizeof(qual));
300         }
301         ((CGEvidenceDnbs15*)cself)->records++;
302         if( strcmp(next_interval_id, data->interval_id) != 0 ) {
303             if (score_allele_num == 3) {
304               DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
305                             "\t%i\t'%s'\t%i\t'%s'\t%c\t%hu\t%hu\t%hu\t'%.*s'\t'%s'\n",
306                 data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
307                 m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
308                 m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
309                 reference_alignment, mate_offset_in_reference, mate_reference_alignment,
310                 m->mapping_quality, score_allele[0], score_allele[1], score_allele[2], m->read_len, m->read, qual));
311             }
312             else if (score_allele_num == 4) {
313               DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
314                             "\t%i\t'%s'\t%i\t'%s'\t%c\t%hu\t%hu\t%hu\t%hu\t'%.*s'\t'%s'\n",
315                 data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
316                 m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
317                 m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
318                 reference_alignment, mate_offset_in_reference, mate_reference_alignment,
319                 m->mapping_quality, score_allele[0], score_allele[1], score_allele[2], score_allele[3], m->read_len, m->read, qual));
320             }
321             else { assert(0); }
322         }
323         CG_LINE_END();
324         if( next_interval_id[0] == '\0' ) {
325             break;
326         }
327         if( strcmp(next_interval_id, data->interval_id) != 0 ) {
328             /* next record is from next interval, remeber it and stop */
329             memmove(&next_rec, m, sizeof(next_rec));
330             data->qty--;
331             break;
332         }
333     } while( rc == 0 );
334     return rc;
335 }
336 
CGEvidenceDnbs25_Read(const CGEvidenceDnbs15 * cself,const char * interval_id,TEvidenceDnbsData * data)337 static rc_t CC CGEvidenceDnbs25_Read(const CGEvidenceDnbs15 *cself,
338      const char *interval_id, TEvidenceDnbsData *data)
339 {
340     const int score_allele_num = 4;
341     rc_t rc = 0;
342     TEvidenceDnbsData_dnb* m = NULL;
343     static TEvidenceDnbsData_dnb next_rec;
344     static char next_interval_id[32] = "";
345 
346     /* local copy of unused TEvidenceDnbsData_dnb struct elements */
347     char reference_alignment[CG_EVDNC_ALLELE_CIGAR_LEN];
348     INSDC_coord_zero mate_offset_in_reference;
349     char mate_reference_alignment[CG_EVDNC_ALLELE_CIGAR_LEN];
350     uint16_t score_allele[4] = {0, 0, 0, 0}; /* v >= 2.0 has ScoreAllele[0-3] */
351     char qual[CG_EVDNC_SPOT_LEN];
352 
353     bool lfr = false;
354     assert(cself->file->cg_file);
355     lfr = cself->file->cg_file->libraryType == cg_eLibraryType_PureLFR;
356 
357     strcpy(data->interval_id, interval_id);
358     data->qty = 0;
359     /* already read one rec for this interval_id */
360     if( next_interval_id[0] != '\0' ) {
361         if( strcmp(next_interval_id, interval_id) != 0 ) {
362             /* nothing todo since next interval id is different */
363             return rc;
364         }
365         m = &data->dnbs[data->qty++];
366         memmove(m, &next_rec, sizeof(next_rec));
367         DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
368                         "\t%i\tnot_used\t0\tnot_used\t%c\t0\t0\t0\t'%.*s'\t'--'\n",
369             data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
370             m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
371             m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
372             m->mapping_quality, m->read_len, m->read));
373     }
374     do {
375         int i = 0;
376         char tmp[2];
377         CG_LINE_START(cself->file, b, len, p);
378         if( b == NULL || len == 0 ) {
379             next_interval_id[0] = '\0';
380             break; /* EOF */
381         }
382         if( data->qty >= data->max_qty ) {
383             TEvidenceDnbsData_dnb* x;
384             data->max_qty += 100;
385             x = realloc(data->dnbs, sizeof(*(data->dnbs)) * data->max_qty);
386             if( x == NULL ) {
387                 rc = RC(rcRuntime, rcFile, rcReading, rcMemory, rcExhausted);
388                 break;
389             }
390             data->dnbs = x;
391         }
392         m = &data->dnbs[data->qty++];
393 
394         /*DEBUG_MSG(10, ("%2hu evidenceDnbs: '%.*s'\n", data->qty, len, b));*/
395         CG_LINE_NEXT_FIELD(b, len, p);
396         rc = str2buf(b, p - b, next_interval_id, sizeof(next_interval_id));
397         CG_LINE_NEXT_FIELD(b, len, p);
398         rc = str2buf(b, p - b, m->chr, sizeof(m->chr));
399         CG_LINE_NEXT_FIELD(b, len, p);
400         rc = str2buf(b, p - b, m->slide, sizeof(m->slide));
401         CG_LINE_NEXT_FIELD(b, len, p);
402         rc = str2buf(b, p - b, m->lane, sizeof(m->lane));
403         CG_LINE_NEXT_FIELD(b, len, p);
404         rc = str2u32(b, p - b, &m->file_num_in_lane);
405         CG_LINE_NEXT_FIELD(b, len, p);
406         rc = str2u64(b, p - b, &m->dnb_offset_in_lane_file);
407         CG_LINE_NEXT_FIELD(b, len, p);
408         rc = str2u16(b, p - b, &m->allele_index);
409         CG_LINE_NEXT_FIELD(b, len, p);
410         rc = str2buf(b, p - b, tmp, sizeof(tmp));
411         if( tmp[0] != 'L' && tmp[0] != 'R' ) {
412             rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
413         }
414         m->side = tmp[0];
415         CG_LINE_NEXT_FIELD(b, len, p);
416         rc = str2buf(b, p - b, tmp, sizeof(tmp));
417         if( tmp[0] != '+' && tmp[0] != '-' ) {
418             rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
419         }
420         m->strand = tmp[0];
421         CG_LINE_NEXT_FIELD(b, len, p);
422         rc = str2i32(b, p - b, &m->offset_in_allele);
423         CG_LINE_NEXT_FIELD(b, len, p);
424         rc = str2buf(b, p - b, m->allele_alignment, sizeof(m->allele_alignment));
425         m->allele_alignment_length = p - b;
426         CG_LINE_NEXT_FIELD(b, len, p);
427         rc = str2i32(b, p - b, &m->offset_in_reference);
428         CG_LINE_NEXT_FIELD(b, len, p);
429         rc = str2buf(b, p - b, reference_alignment, sizeof(reference_alignment));
430         CG_LINE_NEXT_FIELD(b, len, p);
431         rc = str2i32(b, p - b, &mate_offset_in_reference);
432         CG_LINE_NEXT_FIELD(b, len, p);
433         rc = str2buf(b, p - b, mate_reference_alignment, sizeof(mate_reference_alignment));
434         CG_LINE_NEXT_FIELD(b, len, p);
435         rc = str2buf(b, p - b, tmp, sizeof(tmp));
436         if( tmp[0] < 33 || tmp[0] > 126 ) {
437             rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
438         }
439         m->mapping_quality = tmp[0];
440         for (i = 0; i < score_allele_num; ++i) {
441             CG_LINE_NEXT_FIELD(b, len, p);
442             rc = str2u16(b, p - b, &score_allele[i]);
443 	        if (rc != 0) {
444 		        score_allele[i] = 0;
445 		        rc = 0;
446 	        }
447         }
448         CG_LINE_NEXT_FIELD(b, len, p);
449         m->read_len = p - b;
450         rc = str2buf(b, m->read_len, m->read, sizeof(m->read));
451 
452         /* Scores */
453         if (lfr) {
454             CG_LINE_NEXT_FIELD(b, len, p);
455         }
456         else {
457             CG_LINE_LAST_FIELD(b, len, p);
458         }
459         if (m->read_len != p - b) {
460             rc = RC(rcRuntime, rcFile, rcReading, rcData, rcInconsistent);
461         }
462         else {
463             rc = str2buf(b, p - b, qual, sizeof(qual));
464         }
465 
466         if (lfr) {
467          /* Standard Sequencing Service Data File Formats (File format v2.5)
468             states that Scores is the last column in evidenceDnbs file
469             but in 2.5 submission mentioned in SRA-2617
470             there are additionally wellId and wellScore columns.
471             #LIBRARY_TYPE is Pure LFR for this submission. */
472 
473             /* wellId */
474             CG_LINE_NEXT_FIELD(b, len, p);
475             rc = str2u16(b, p - b, &m->wellId);
476             if (rc == 0 && (m->wellId < 0 || m->wellId > 384)) {
477                 rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
478             }
479 
480             /* wellScore */
481             CG_LINE_LAST_FIELD(b, len, p);
482         }
483 
484         ((CGEvidenceDnbs15*)cself)->records++;
485         if( strcmp(next_interval_id, data->interval_id) != 0 ) {
486             if (score_allele_num == 3) {
487               DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
488                             "\t%i\t'%s'\t%i\t'%s'\t%c\t%hu\t%hu\t%hu\t'%.*s'\t'%s'\n",
489                 data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
490                 m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
491                 m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
492                 reference_alignment, mate_offset_in_reference, mate_reference_alignment,
493                 m->mapping_quality, score_allele[0], score_allele[1], score_allele[2], m->read_len, m->read, qual));
494             }
495             else if (score_allele_num == 4) {
496               DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
497                             "\t%i\t'%s'\t%i\t'%s'\t%c\t%hu\t%hu\t%hu\t%hu\t'%.*s'\t'%s'\n",
498                 data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
499                 m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
500                 m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
501                 reference_alignment, mate_offset_in_reference, mate_reference_alignment,
502                 m->mapping_quality, score_allele[0], score_allele[1], score_allele[2], score_allele[3], m->read_len, m->read, qual));
503             }
504             else { assert(0); }
505         }
506         CG_LINE_END();
507         if( next_interval_id[0] == '\0' ) {
508             break;
509         }
510         if( strcmp(next_interval_id, data->interval_id) != 0 ) {
511             /* next record is from next interval, remeber it and stop */
512             memmove(&next_rec, m, sizeof(next_rec));
513             data->qty--;
514             break;
515         }
516     } while( rc == 0 );
517     return rc;
518 }
519 
520 static
CGEvidenceDnbs15_Read(const CGEvidenceDnbs15 * self,const char * interval_id,TEvidenceDnbsData * data)521 rc_t CC CGEvidenceDnbs15_Read(const CGEvidenceDnbs15* self, const char* interval_id, TEvidenceDnbsData* data)
522 {   return CGEvidenceDnbs_Read(self, interval_id, data, 3); }
523 
524 static
CGEvidenceDnbs20_Read(const CGEvidenceDnbs15 * self,const char * interval_id,TEvidenceDnbsData * data)525 rc_t CC CGEvidenceDnbs20_Read(const CGEvidenceDnbs15* self, const char* interval_id, TEvidenceDnbsData* data)
526 {   return CGEvidenceDnbs_Read(self, interval_id, data, 4); }
527 
528 static const CGFileType_vt CGEvidenceDnbs15_vt =
529 {
530     CGEvidenceDnbs15_Header,
531     NULL,
532     NULL,
533     NULL,
534     NULL,
535     CGEvidenceDnbs15_Read,
536     NULL, /* tag_lfr */
537     CGEvidenceDnbs15_GetAssemblyId,
538     NULL,
539     NULL,
540     NULL,
541     CGEvidenceDnbs15_GetSample,
542     CGEvidenceDnbs15_GetChromosome,
543     CGEvidenceDnbs15_Release
544 };
545 
546 static const CGFileType_vt CGEvidenceDnbs20_vt =
547 {
548     CGEvidenceDnbs15_Header,
549     NULL,
550     NULL,
551     NULL,
552     NULL,
553     CGEvidenceDnbs20_Read,
554     NULL, /* tag_lfr */
555     CGEvidenceDnbs15_GetAssemblyId,
556     NULL,
557     NULL,
558     NULL,
559     CGEvidenceDnbs15_GetSample,
560     CGEvidenceDnbs15_GetChromosome,
561     CGEvidenceDnbs15_Release
562 };
563 
564 static const CGFileType_vt CGEvidenceDnbs25_vt = {
565     CGEvidenceDnbs25_Header,
566     NULL,
567     NULL,
568     NULL,
569     NULL,
570     CGEvidenceDnbs25_Read,
571     NULL, /* tag_lfr */
572     CGEvidenceDnbs15_GetAssemblyId,
573     NULL,
574     NULL,
575     NULL,
576     CGEvidenceDnbs15_GetSample,
577     CGEvidenceDnbs15_GetChromosome,
578     CGEvidenceDnbs15_Release
579 };
580 
581 static
CGEvidenceDnbs_Make(const CGFileType ** cself,const CGLoaderFile * file,const CGFileType_vt * vt)582 rc_t CC CGEvidenceDnbs_Make(const CGFileType** cself, const CGLoaderFile* file,
583                               const CGFileType_vt* vt)
584 {
585     rc_t rc = 0;
586     CGEvidenceDnbs15* obj = NULL;
587 
588     assert(vt);
589 
590     if( cself == NULL || file == NULL ) {
591         rc = RC(rcRuntime, rcFile, rcConstructing, rcParam, rcNull);
592     } else {
593         *cself = NULL;
594         if( (obj = calloc(1, sizeof(*obj))) == NULL ) {
595             rc = RC(rcRuntime, rcFile, rcConstructing, rcMemory, rcExhausted);
596         } else {
597             obj->file = file;
598             obj->dad.type = cg_eFileType_EVIDENCE_DNBS;
599             obj->dad.vt = vt;
600         }
601     }
602     if( rc == 0 ) {
603         *cself = &obj->dad;
604     } else {
605         CGEvidenceDnbs15_Release(obj, NULL);
606     }
607     return rc;
608 }
609 
CGEvidenceDnbs15_Make(const CGFileType ** self,const CGLoaderFile * file)610 rc_t CC CGEvidenceDnbs15_Make(const CGFileType** self, const CGLoaderFile* file)
611 {   return CGEvidenceDnbs_Make(self, file, &CGEvidenceDnbs15_vt); }
612 
CGEvidenceDnbs13_Make(const CGFileType ** self,const CGLoaderFile * file)613 rc_t CC CGEvidenceDnbs13_Make(const CGFileType** self, const CGLoaderFile* file)
614 {   return CGEvidenceDnbs15_Make(self, file); }
615 
CGEvidenceDnbs20_Make(const CGFileType ** self,const CGLoaderFile * file)616 rc_t CC CGEvidenceDnbs20_Make(const CGFileType** self, const CGLoaderFile* file)
617 {   return CGEvidenceDnbs_Make(self, file, &CGEvidenceDnbs20_vt); }
618 
CGEvidenceDnbs22_Make(const CGFileType ** self,const CGLoaderFile * file)619 rc_t CC CGEvidenceDnbs22_Make(const CGFileType** self, const CGLoaderFile* file)
620 {   return CGEvidenceDnbs_Make(self, file, &CGEvidenceDnbs20_vt); }
621 
CGEvidenceDnbs25_Make(const CGFileType ** self,const CGLoaderFile * file)622 rc_t CC CGEvidenceDnbs25_Make(const CGFileType** self, const CGLoaderFile* file)
623 {
624     return CGEvidenceDnbs_Make(self, file, &CGEvidenceDnbs25_vt);
625 }
626