1 /*==============================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 */
25
26 #include "debug.h"
27
28 #include "factory-cmn.h"
29 #include "factory-evidence-dnbs.h"
30
31 typedef struct CGEvidenceDnbs15 CGEvidenceDnbs15;
32 #define CGFILETYPE_IMPL CGEvidenceDnbs15
33 #include "file.h"
34
35 #include <klib/printf.h>
36 #include <klib/rc.h>
37
38 #include <sysalloc.h>
39
40 #include <assert.h>
41 #include <stdlib.h>
42 #include <string.h>
43
44 struct CGEvidenceDnbs15 {
45 CGFileType dad;
46 const CGLoaderFile* file;
47 uint64_t records;
48 /* headers */
49 CGFIELD15_ASSEMBLY_ID assembly_id;
50 CGFIELD15_CHROMOSOME chromosome;
51 CGFIELD15_GENERATED_AT generated_at;
52 CGFIELD15_GENERATED_BY generated_by;
53 CGFIELD15_SAMPLE sample;
54 CGFIELD15_SOFTWARE_VERSION software_version;
55 };
56
CGEvidenceDnbs15_Release(const CGEvidenceDnbs15 * cself,uint64_t * records)57 void CGEvidenceDnbs15_Release(const CGEvidenceDnbs15* cself, uint64_t* records)
58 {
59 if( cself != NULL ) {
60 CGEvidenceDnbs15* self = (CGEvidenceDnbs15*)cself;
61 if( records != NULL ) {
62 *records = cself->records;
63 }
64 free(self);
65 }
66 }
67
68 static
CGEvidenceDnbs15_Header(const CGEvidenceDnbs15 * cself,const char * buf,const size_t len)69 rc_t CC CGEvidenceDnbs15_Header(const CGEvidenceDnbs15* cself, const char* buf, const size_t len)
70 {
71 rc_t rc = 0;
72 size_t slen;
73 CGEvidenceDnbs15* self = (CGEvidenceDnbs15*)cself;
74
75 if( strncmp("ASSEMBLY_ID\t", buf, slen = 12) == 0 ) {
76 rc = str2buf(&buf[slen], len - slen, self->assembly_id, sizeof(self->assembly_id));
77 } else if( strncmp("CHROMOSOME\t", buf, slen = 11) == 0 ) {
78 rc = str2buf(&buf[slen], len - slen, self->chromosome, sizeof(self->chromosome));
79 } else if( strncmp("GENERATED_AT\t", buf, slen = 13) == 0 ) {
80 rc = str2buf(&buf[slen], len - slen, self->generated_at, sizeof(self->generated_at));
81 } else if( strncmp("GENERATED_BY\t", buf, slen = 13) == 0 ) {
82 rc = str2buf(&buf[slen], len - slen, self->generated_by, sizeof(self->generated_by));
83 } else if( strncmp("SAMPLE\t", buf, slen = 7) == 0 ) {
84 rc = str2buf(&buf[slen], len - slen, self->sample, sizeof(self->sample));
85 } else if( strncmp("SOFTWARE_VERSION\t", buf, slen = 17) == 0 ) {
86 rc = str2buf(&buf[slen], len - slen, self->software_version, sizeof(self->software_version));
87 } else {
88 rc = RC(rcRuntime, rcFile, rcConstructing, rcName, rcUnrecognized);
89 }
90 return rc;
91 }
92
CGEvidenceDnbs25_Header(const CGEvidenceDnbs15 * cself,const char * buf,const size_t len)93 static rc_t CC CGEvidenceDnbs25_Header(
94 const CGEvidenceDnbs15* cself, const char* buf, const size_t len)
95 {
96 rc_t rc = 0;
97 size_t slen;
98 CGEvidenceDnbs15* self = (CGEvidenceDnbs15*)cself;
99
100 /* from SRA-2617 files */
101 if (strncmp("APPROVAL\t", buf, slen = 9) == 0) {
102 }
103 else if (strncmp("TITLE\t", buf, slen = 6) == 0) {
104 }
105 else if (strncmp("ADDRESS\t", buf, slen = 8) == 0) {
106 }
107
108 /* From Table 1: Header Metadata Present in all Data Files */
109 else if (strncmp("CUSTOMER_SAMPLE_ID\t", buf, slen = 19) == 0) {
110 }
111 else if (strncmp("SAMPLE_SOURCE\t", buf, slen = 14) == 0) {
112 }
113 else if (strncmp("REPORTED_GENDER\t", buf, slen = 16) == 0) {
114 }
115 else if (strncmp("CALLED_GENDER\t", buf, slen = 14) == 0) {
116 }
117 else if (strncmp("TUMOR_STATUS\t", buf, slen = 13) == 0) {
118 }
119 else if (strncmp("LIBRARY_TYPE\t", buf, slen = 13) == 0) {
120 }
121 else if (strncmp("LIBRARY_SOURCE\t", buf, slen = 13) == 0) {
122 }
123
124 else if (strncmp("ASSEMBLY_ID\t", buf, slen = 12) == 0) {
125 rc = str2buf(&buf[slen], len - slen,
126 self->assembly_id, sizeof(self->assembly_id));
127 }
128 else if (strncmp("CHROMOSOME\t", buf, slen = 11) == 0) {
129 rc = str2buf(&buf[slen], len - slen,
130 self->chromosome, sizeof(self->chromosome));
131 }
132 else if (strncmp("GENERATED_AT\t", buf, slen = 13) == 0) {
133 rc = str2buf(&buf[slen], len - slen,
134 self->generated_at, sizeof(self->generated_at));
135 }
136 else if (strncmp("GENERATED_BY\t", buf, slen = 13) == 0) {
137 rc = str2buf(&buf[slen], len - slen,
138 self->generated_by, sizeof(self->generated_by));
139 }
140 else if (strncmp("SAMPLE\t", buf, slen = 7) == 0) {
141 rc = str2buf(&buf[slen], len - slen,
142 self->sample, sizeof(self->sample));
143 }
144 else if (strncmp("SOFTWARE_VERSION\t", buf, slen = 17) == 0) {
145 rc = str2buf(&buf[slen], len - slen,
146 self->software_version, sizeof(self->software_version));
147 }
148 else {
149 rc = RC(rcRuntime, rcFile, rcConstructing, rcName, rcUnrecognized);
150 }
151
152 return rc;
153 }
154
155 static
CGEvidenceDnbs15_GetAssemblyId(const CGEvidenceDnbs15 * cself,const CGFIELD_ASSEMBLY_ID_TYPE ** assembly_id)156 rc_t CGEvidenceDnbs15_GetAssemblyId(const CGEvidenceDnbs15* cself, const CGFIELD_ASSEMBLY_ID_TYPE** assembly_id)
157 {
158 if( cself->assembly_id[0] == '\0' ) {
159 return RC(rcRuntime, rcFile, rcReading, rcFormat, rcInvalid);
160 }
161 *assembly_id = cself->assembly_id;
162 return 0;
163 }
164
165 static
CGEvidenceDnbs15_GetSample(const CGEvidenceDnbs15 * cself,const CGFIELD_SAMPLE_TYPE ** sample)166 rc_t CGEvidenceDnbs15_GetSample(const CGEvidenceDnbs15* cself, const CGFIELD_SAMPLE_TYPE** sample)
167 {
168 if( cself->sample[0] == '\0' ) {
169 return RC(rcRuntime, rcFile, rcReading, rcFormat, rcInvalid);
170 }
171 *sample = cself->sample;
172 return 0;
173 }
174
175 static
CGEvidenceDnbs15_GetChromosome(const CGEvidenceDnbs15 * cself,const CGFIELD_CHROMOSOME_TYPE ** chromosome)176 rc_t CGEvidenceDnbs15_GetChromosome(const CGEvidenceDnbs15* cself, const CGFIELD_CHROMOSOME_TYPE** chromosome)
177 {
178 if( cself->chromosome[0] == '\0' ) {
179 return RC(rcRuntime, rcFile, rcReading, rcFormat, rcInvalid);
180 }
181 *chromosome = cself->chromosome;
182 return 0;
183 }
184
CGEvidenceDnbs_Read(const CGEvidenceDnbs15 * cself,const char * interval_id,TEvidenceDnbsData * data,int score_allele_num)185 static rc_t CC CGEvidenceDnbs_Read(
186 const CGEvidenceDnbs15* cself, const char* interval_id,
187 TEvidenceDnbsData* data, int score_allele_num)
188 {
189 rc_t rc = 0;
190 TEvidenceDnbsData_dnb* m = NULL;
191 static TEvidenceDnbsData_dnb next_rec;
192 static char next_interval_id[32] = "";
193
194 /* local copy of unused TEvidenceDnbsData_dnb struct elements */
195 char reference_alignment[CG_EVDNC_ALLELE_CIGAR_LEN];
196 INSDC_coord_zero mate_offset_in_reference;
197 char mate_reference_alignment[CG_EVDNC_ALLELE_CIGAR_LEN];
198 uint16_t score_allele[4] = {0, 0, 0, 0}; /* v1.5 has ScoreAllele[012]; v2.0 - [0123] */
199 char qual[CG_EVDNC_SPOT_LEN];
200
201 strcpy(data->interval_id, interval_id);
202 data->qty = 0;
203 /* already read one rec for this interval_id */
204 if( next_interval_id[0] != '\0' ) {
205 if( strcmp(next_interval_id, interval_id) != 0 ) {
206 /* nothing todo since next interval id is different */
207 return rc;
208 }
209 m = &data->dnbs[data->qty++];
210 memmove(m, &next_rec, sizeof(next_rec));
211 DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
212 "\t%i\tnot_used\t0\tnot_used\t%c\t0\t0\t0\t'%.*s'\t'--'\n",
213 data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
214 m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
215 m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
216 m->mapping_quality, m->read_len, m->read));
217 }
218 do {
219 int i = 0;
220 char tmp[2];
221 CG_LINE_START(cself->file, b, len, p);
222 if( b == NULL || len == 0 ) {
223 next_interval_id[0] = '\0';
224 break; /* EOF */
225 }
226 if( data->qty >= data->max_qty ) {
227 TEvidenceDnbsData_dnb* x;
228 data->max_qty += 100;
229 x = realloc(data->dnbs, sizeof(*(data->dnbs)) * data->max_qty);
230 if( x == NULL ) {
231 rc = RC(rcRuntime, rcFile, rcReading, rcMemory, rcExhausted);
232 break;
233 }
234 data->dnbs = x;
235 }
236 m = &data->dnbs[data->qty++];
237
238 /*DEBUG_MSG(10, ("%2hu evidenceDnbs: '%.*s'\n", data->qty, len, b));*/
239 CG_LINE_NEXT_FIELD(b, len, p);
240 rc = str2buf(b, p - b, next_interval_id, sizeof(next_interval_id));
241 CG_LINE_NEXT_FIELD(b, len, p);
242 rc = str2buf(b, p - b, m->chr, sizeof(m->chr));
243 CG_LINE_NEXT_FIELD(b, len, p);
244 rc = str2buf(b, p - b, m->slide, sizeof(m->slide));
245 CG_LINE_NEXT_FIELD(b, len, p);
246 rc = str2buf(b, p - b, m->lane, sizeof(m->lane));
247 CG_LINE_NEXT_FIELD(b, len, p);
248 rc = str2u32(b, p - b, &m->file_num_in_lane);
249 CG_LINE_NEXT_FIELD(b, len, p);
250 rc = str2u64(b, p - b, &m->dnb_offset_in_lane_file);
251 CG_LINE_NEXT_FIELD(b, len, p);
252 rc = str2u16(b, p - b, &m->allele_index);
253 CG_LINE_NEXT_FIELD(b, len, p);
254 rc = str2buf(b, p - b, tmp, sizeof(tmp));
255 if( tmp[0] != 'L' && tmp[0] != 'R' ) {
256 rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
257 }
258 m->side = tmp[0];
259 CG_LINE_NEXT_FIELD(b, len, p);
260 rc = str2buf(b, p - b, tmp, sizeof(tmp));
261 if( tmp[0] != '+' && tmp[0] != '-' ) {
262 rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
263 }
264 m->strand = tmp[0];
265 CG_LINE_NEXT_FIELD(b, len, p);
266 rc = str2i32(b, p - b, &m->offset_in_allele);
267 CG_LINE_NEXT_FIELD(b, len, p);
268 rc = str2buf(b, p - b, m->allele_alignment, sizeof(m->allele_alignment));
269 m->allele_alignment_length = p - b;
270 CG_LINE_NEXT_FIELD(b, len, p);
271 rc = str2i32(b, p - b, &m->offset_in_reference);
272 CG_LINE_NEXT_FIELD(b, len, p);
273 rc = str2buf(b, p - b, reference_alignment, sizeof(reference_alignment));
274 CG_LINE_NEXT_FIELD(b, len, p);
275 rc = str2i32(b, p - b, &mate_offset_in_reference);
276 CG_LINE_NEXT_FIELD(b, len, p);
277 rc = str2buf(b, p - b, mate_reference_alignment, sizeof(mate_reference_alignment));
278 CG_LINE_NEXT_FIELD(b, len, p);
279 rc = str2buf(b, p - b, tmp, sizeof(tmp));
280 if( tmp[0] < 33 || tmp[0] > 126 ) {
281 rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
282 }
283 m->mapping_quality = tmp[0];
284 for (i = 0; i < score_allele_num; ++i) {
285 CG_LINE_NEXT_FIELD(b, len, p);
286 rc = str2u16(b, p - b, &score_allele[i]);
287 if(rc){
288 score_allele[i] =0;
289 rc =0;
290 }
291 }
292 CG_LINE_NEXT_FIELD(b, len, p);
293 m->read_len = p - b;
294 rc = str2buf(b, m->read_len, m->read, sizeof(m->read));
295 CG_LINE_LAST_FIELD(b, len, p);
296 if( m->read_len != p - b ) {
297 rc = RC(rcRuntime, rcFile, rcReading, rcData, rcInconsistent);
298 } else {
299 rc = str2buf(b, p - b, qual, sizeof(qual));
300 }
301 ((CGEvidenceDnbs15*)cself)->records++;
302 if( strcmp(next_interval_id, data->interval_id) != 0 ) {
303 if (score_allele_num == 3) {
304 DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
305 "\t%i\t'%s'\t%i\t'%s'\t%c\t%hu\t%hu\t%hu\t'%.*s'\t'%s'\n",
306 data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
307 m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
308 m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
309 reference_alignment, mate_offset_in_reference, mate_reference_alignment,
310 m->mapping_quality, score_allele[0], score_allele[1], score_allele[2], m->read_len, m->read, qual));
311 }
312 else if (score_allele_num == 4) {
313 DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
314 "\t%i\t'%s'\t%i\t'%s'\t%c\t%hu\t%hu\t%hu\t%hu\t'%.*s'\t'%s'\n",
315 data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
316 m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
317 m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
318 reference_alignment, mate_offset_in_reference, mate_reference_alignment,
319 m->mapping_quality, score_allele[0], score_allele[1], score_allele[2], score_allele[3], m->read_len, m->read, qual));
320 }
321 else { assert(0); }
322 }
323 CG_LINE_END();
324 if( next_interval_id[0] == '\0' ) {
325 break;
326 }
327 if( strcmp(next_interval_id, data->interval_id) != 0 ) {
328 /* next record is from next interval, remeber it and stop */
329 memmove(&next_rec, m, sizeof(next_rec));
330 data->qty--;
331 break;
332 }
333 } while( rc == 0 );
334 return rc;
335 }
336
CGEvidenceDnbs25_Read(const CGEvidenceDnbs15 * cself,const char * interval_id,TEvidenceDnbsData * data)337 static rc_t CC CGEvidenceDnbs25_Read(const CGEvidenceDnbs15 *cself,
338 const char *interval_id, TEvidenceDnbsData *data)
339 {
340 const int score_allele_num = 4;
341 rc_t rc = 0;
342 TEvidenceDnbsData_dnb* m = NULL;
343 static TEvidenceDnbsData_dnb next_rec;
344 static char next_interval_id[32] = "";
345
346 /* local copy of unused TEvidenceDnbsData_dnb struct elements */
347 char reference_alignment[CG_EVDNC_ALLELE_CIGAR_LEN];
348 INSDC_coord_zero mate_offset_in_reference;
349 char mate_reference_alignment[CG_EVDNC_ALLELE_CIGAR_LEN];
350 uint16_t score_allele[4] = {0, 0, 0, 0}; /* v >= 2.0 has ScoreAllele[0-3] */
351 char qual[CG_EVDNC_SPOT_LEN];
352
353 bool lfr = false;
354 assert(cself->file->cg_file);
355 lfr = cself->file->cg_file->libraryType == cg_eLibraryType_PureLFR;
356
357 strcpy(data->interval_id, interval_id);
358 data->qty = 0;
359 /* already read one rec for this interval_id */
360 if( next_interval_id[0] != '\0' ) {
361 if( strcmp(next_interval_id, interval_id) != 0 ) {
362 /* nothing todo since next interval id is different */
363 return rc;
364 }
365 m = &data->dnbs[data->qty++];
366 memmove(m, &next_rec, sizeof(next_rec));
367 DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
368 "\t%i\tnot_used\t0\tnot_used\t%c\t0\t0\t0\t'%.*s'\t'--'\n",
369 data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
370 m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
371 m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
372 m->mapping_quality, m->read_len, m->read));
373 }
374 do {
375 int i = 0;
376 char tmp[2];
377 CG_LINE_START(cself->file, b, len, p);
378 if( b == NULL || len == 0 ) {
379 next_interval_id[0] = '\0';
380 break; /* EOF */
381 }
382 if( data->qty >= data->max_qty ) {
383 TEvidenceDnbsData_dnb* x;
384 data->max_qty += 100;
385 x = realloc(data->dnbs, sizeof(*(data->dnbs)) * data->max_qty);
386 if( x == NULL ) {
387 rc = RC(rcRuntime, rcFile, rcReading, rcMemory, rcExhausted);
388 break;
389 }
390 data->dnbs = x;
391 }
392 m = &data->dnbs[data->qty++];
393
394 /*DEBUG_MSG(10, ("%2hu evidenceDnbs: '%.*s'\n", data->qty, len, b));*/
395 CG_LINE_NEXT_FIELD(b, len, p);
396 rc = str2buf(b, p - b, next_interval_id, sizeof(next_interval_id));
397 CG_LINE_NEXT_FIELD(b, len, p);
398 rc = str2buf(b, p - b, m->chr, sizeof(m->chr));
399 CG_LINE_NEXT_FIELD(b, len, p);
400 rc = str2buf(b, p - b, m->slide, sizeof(m->slide));
401 CG_LINE_NEXT_FIELD(b, len, p);
402 rc = str2buf(b, p - b, m->lane, sizeof(m->lane));
403 CG_LINE_NEXT_FIELD(b, len, p);
404 rc = str2u32(b, p - b, &m->file_num_in_lane);
405 CG_LINE_NEXT_FIELD(b, len, p);
406 rc = str2u64(b, p - b, &m->dnb_offset_in_lane_file);
407 CG_LINE_NEXT_FIELD(b, len, p);
408 rc = str2u16(b, p - b, &m->allele_index);
409 CG_LINE_NEXT_FIELD(b, len, p);
410 rc = str2buf(b, p - b, tmp, sizeof(tmp));
411 if( tmp[0] != 'L' && tmp[0] != 'R' ) {
412 rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
413 }
414 m->side = tmp[0];
415 CG_LINE_NEXT_FIELD(b, len, p);
416 rc = str2buf(b, p - b, tmp, sizeof(tmp));
417 if( tmp[0] != '+' && tmp[0] != '-' ) {
418 rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
419 }
420 m->strand = tmp[0];
421 CG_LINE_NEXT_FIELD(b, len, p);
422 rc = str2i32(b, p - b, &m->offset_in_allele);
423 CG_LINE_NEXT_FIELD(b, len, p);
424 rc = str2buf(b, p - b, m->allele_alignment, sizeof(m->allele_alignment));
425 m->allele_alignment_length = p - b;
426 CG_LINE_NEXT_FIELD(b, len, p);
427 rc = str2i32(b, p - b, &m->offset_in_reference);
428 CG_LINE_NEXT_FIELD(b, len, p);
429 rc = str2buf(b, p - b, reference_alignment, sizeof(reference_alignment));
430 CG_LINE_NEXT_FIELD(b, len, p);
431 rc = str2i32(b, p - b, &mate_offset_in_reference);
432 CG_LINE_NEXT_FIELD(b, len, p);
433 rc = str2buf(b, p - b, mate_reference_alignment, sizeof(mate_reference_alignment));
434 CG_LINE_NEXT_FIELD(b, len, p);
435 rc = str2buf(b, p - b, tmp, sizeof(tmp));
436 if( tmp[0] < 33 || tmp[0] > 126 ) {
437 rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
438 }
439 m->mapping_quality = tmp[0];
440 for (i = 0; i < score_allele_num; ++i) {
441 CG_LINE_NEXT_FIELD(b, len, p);
442 rc = str2u16(b, p - b, &score_allele[i]);
443 if (rc != 0) {
444 score_allele[i] = 0;
445 rc = 0;
446 }
447 }
448 CG_LINE_NEXT_FIELD(b, len, p);
449 m->read_len = p - b;
450 rc = str2buf(b, m->read_len, m->read, sizeof(m->read));
451
452 /* Scores */
453 if (lfr) {
454 CG_LINE_NEXT_FIELD(b, len, p);
455 }
456 else {
457 CG_LINE_LAST_FIELD(b, len, p);
458 }
459 if (m->read_len != p - b) {
460 rc = RC(rcRuntime, rcFile, rcReading, rcData, rcInconsistent);
461 }
462 else {
463 rc = str2buf(b, p - b, qual, sizeof(qual));
464 }
465
466 if (lfr) {
467 /* Standard Sequencing Service Data File Formats (File format v2.5)
468 states that Scores is the last column in evidenceDnbs file
469 but in 2.5 submission mentioned in SRA-2617
470 there are additionally wellId and wellScore columns.
471 #LIBRARY_TYPE is Pure LFR for this submission. */
472
473 /* wellId */
474 CG_LINE_NEXT_FIELD(b, len, p);
475 rc = str2u16(b, p - b, &m->wellId);
476 if (rc == 0 && (m->wellId < 0 || m->wellId > 384)) {
477 rc = RC(rcRuntime, rcFile, rcReading, rcData, rcOutofrange);
478 }
479
480 /* wellScore */
481 CG_LINE_LAST_FIELD(b, len, p);
482 }
483
484 ((CGEvidenceDnbs15*)cself)->records++;
485 if( strcmp(next_interval_id, data->interval_id) != 0 ) {
486 if (score_allele_num == 3) {
487 DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
488 "\t%i\t'%s'\t%i\t'%s'\t%c\t%hu\t%hu\t%hu\t'%.*s'\t'%s'\n",
489 data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
490 m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
491 m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
492 reference_alignment, mate_offset_in_reference, mate_reference_alignment,
493 m->mapping_quality, score_allele[0], score_allele[1], score_allele[2], m->read_len, m->read, qual));
494 }
495 else if (score_allele_num == 4) {
496 DEBUG_MSG(10, ("%3u evidenceDnbs: '%s'\t'%s'\t'%s'\t'%s'\t%u\t%lu\t%hu\t%c\t%c\t%i\t'%.*s'"
497 "\t%i\t'%s'\t%i\t'%s'\t%c\t%hu\t%hu\t%hu\t%hu\t'%.*s'\t'%s'\n",
498 data->qty, next_interval_id, m->chr, m->slide, m->lane, m->file_num_in_lane,
499 m->dnb_offset_in_lane_file, m->allele_index, m->side, m->strand, m->offset_in_allele,
500 m->allele_alignment_length, m->allele_alignment, m->offset_in_reference,
501 reference_alignment, mate_offset_in_reference, mate_reference_alignment,
502 m->mapping_quality, score_allele[0], score_allele[1], score_allele[2], score_allele[3], m->read_len, m->read, qual));
503 }
504 else { assert(0); }
505 }
506 CG_LINE_END();
507 if( next_interval_id[0] == '\0' ) {
508 break;
509 }
510 if( strcmp(next_interval_id, data->interval_id) != 0 ) {
511 /* next record is from next interval, remeber it and stop */
512 memmove(&next_rec, m, sizeof(next_rec));
513 data->qty--;
514 break;
515 }
516 } while( rc == 0 );
517 return rc;
518 }
519
520 static
CGEvidenceDnbs15_Read(const CGEvidenceDnbs15 * self,const char * interval_id,TEvidenceDnbsData * data)521 rc_t CC CGEvidenceDnbs15_Read(const CGEvidenceDnbs15* self, const char* interval_id, TEvidenceDnbsData* data)
522 { return CGEvidenceDnbs_Read(self, interval_id, data, 3); }
523
524 static
CGEvidenceDnbs20_Read(const CGEvidenceDnbs15 * self,const char * interval_id,TEvidenceDnbsData * data)525 rc_t CC CGEvidenceDnbs20_Read(const CGEvidenceDnbs15* self, const char* interval_id, TEvidenceDnbsData* data)
526 { return CGEvidenceDnbs_Read(self, interval_id, data, 4); }
527
528 static const CGFileType_vt CGEvidenceDnbs15_vt =
529 {
530 CGEvidenceDnbs15_Header,
531 NULL,
532 NULL,
533 NULL,
534 NULL,
535 CGEvidenceDnbs15_Read,
536 NULL, /* tag_lfr */
537 CGEvidenceDnbs15_GetAssemblyId,
538 NULL,
539 NULL,
540 NULL,
541 CGEvidenceDnbs15_GetSample,
542 CGEvidenceDnbs15_GetChromosome,
543 CGEvidenceDnbs15_Release
544 };
545
546 static const CGFileType_vt CGEvidenceDnbs20_vt =
547 {
548 CGEvidenceDnbs15_Header,
549 NULL,
550 NULL,
551 NULL,
552 NULL,
553 CGEvidenceDnbs20_Read,
554 NULL, /* tag_lfr */
555 CGEvidenceDnbs15_GetAssemblyId,
556 NULL,
557 NULL,
558 NULL,
559 CGEvidenceDnbs15_GetSample,
560 CGEvidenceDnbs15_GetChromosome,
561 CGEvidenceDnbs15_Release
562 };
563
564 static const CGFileType_vt CGEvidenceDnbs25_vt = {
565 CGEvidenceDnbs25_Header,
566 NULL,
567 NULL,
568 NULL,
569 NULL,
570 CGEvidenceDnbs25_Read,
571 NULL, /* tag_lfr */
572 CGEvidenceDnbs15_GetAssemblyId,
573 NULL,
574 NULL,
575 NULL,
576 CGEvidenceDnbs15_GetSample,
577 CGEvidenceDnbs15_GetChromosome,
578 CGEvidenceDnbs15_Release
579 };
580
581 static
CGEvidenceDnbs_Make(const CGFileType ** cself,const CGLoaderFile * file,const CGFileType_vt * vt)582 rc_t CC CGEvidenceDnbs_Make(const CGFileType** cself, const CGLoaderFile* file,
583 const CGFileType_vt* vt)
584 {
585 rc_t rc = 0;
586 CGEvidenceDnbs15* obj = NULL;
587
588 assert(vt);
589
590 if( cself == NULL || file == NULL ) {
591 rc = RC(rcRuntime, rcFile, rcConstructing, rcParam, rcNull);
592 } else {
593 *cself = NULL;
594 if( (obj = calloc(1, sizeof(*obj))) == NULL ) {
595 rc = RC(rcRuntime, rcFile, rcConstructing, rcMemory, rcExhausted);
596 } else {
597 obj->file = file;
598 obj->dad.type = cg_eFileType_EVIDENCE_DNBS;
599 obj->dad.vt = vt;
600 }
601 }
602 if( rc == 0 ) {
603 *cself = &obj->dad;
604 } else {
605 CGEvidenceDnbs15_Release(obj, NULL);
606 }
607 return rc;
608 }
609
CGEvidenceDnbs15_Make(const CGFileType ** self,const CGLoaderFile * file)610 rc_t CC CGEvidenceDnbs15_Make(const CGFileType** self, const CGLoaderFile* file)
611 { return CGEvidenceDnbs_Make(self, file, &CGEvidenceDnbs15_vt); }
612
CGEvidenceDnbs13_Make(const CGFileType ** self,const CGLoaderFile * file)613 rc_t CC CGEvidenceDnbs13_Make(const CGFileType** self, const CGLoaderFile* file)
614 { return CGEvidenceDnbs15_Make(self, file); }
615
CGEvidenceDnbs20_Make(const CGFileType ** self,const CGLoaderFile * file)616 rc_t CC CGEvidenceDnbs20_Make(const CGFileType** self, const CGLoaderFile* file)
617 { return CGEvidenceDnbs_Make(self, file, &CGEvidenceDnbs20_vt); }
618
CGEvidenceDnbs22_Make(const CGFileType ** self,const CGLoaderFile * file)619 rc_t CC CGEvidenceDnbs22_Make(const CGFileType** self, const CGLoaderFile* file)
620 { return CGEvidenceDnbs_Make(self, file, &CGEvidenceDnbs20_vt); }
621
CGEvidenceDnbs25_Make(const CGFileType ** self,const CGLoaderFile * file)622 rc_t CC CGEvidenceDnbs25_Make(const CGFileType** self, const CGLoaderFile* file)
623 {
624 return CGEvidenceDnbs_Make(self, file, &CGEvidenceDnbs25_vt);
625 }
626