1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #include <sra/rd-extern.h>
27
28 #include <klib/rc.h>
29 #include <sra/types.h>
30 #include <sra/abi.h>
31 #include <sysalloc.h>
32 #include <klib/text.h>
33
34 #include "reader-cmn.h"
35
36 #include <stdio.h>
37 #include <string.h>
38 #include <ctype.h>
39 #include <os-native.h>
40
41 typedef enum AbsolidReaderOptions_enum {
42 eOrigFormat = 0x02,
43 eSignal = 0x04,
44 eClipQual = 0x08
45 } AbsolidReaderOptions;
46
47 /* column order is important here: see Init function below!!! */
48 static
49 const SRAReaderColumn AbsolidReader_master_columns_desc[] = {
50 {SRAREADER_COL_MANDATORY, "CSREAD", insdc_csfasta_t, NULL, NULL, 0},
51 {SRAREADER_COL_MANDATORY, "CS_KEY", insdc_fasta_t, NULL, NULL, 0},
52 {SRAREADER_COL_MANDATORY, "QUALITY", insdc_phred_t, NULL, NULL, 0},
53 {eClipQual, "TRIM_START", "INSDC:coord:zero", NULL, NULL, 0},
54 {eClipQual, "TRIM_LEN", "INSDC:coord:len", NULL, NULL, 0},
55 {eSignal | SRAREADER_COL_OPTIONAL, "SIGNAL", ncbi_fsamp4_t, NULL, NULL, 0},
56 {SRAREADER_COL_MANDATORY, NULL, NULL, NULL, NULL, 0} /* terminator */
57 };
58
59 struct AbsolidReader {
60 /* SRAReader always must be a first member! */
61 SRAReader dad;
62 uint32_t minReadLen;
63 /* current spot data shortcuts */
64 const SRAReaderColumn* csread;
65 const char** cs_key;
66 const SRAReaderColumn* qual1;
67 const INSDC_coord_zero** trim_start;
68 const INSDC_coord_len** trim_len;
69 const float** signal;
70 char prefix_buf[1024];
71 size_t prefix_sz;
72 };
73
74 static
AbsolidReaderInit(const AbsolidReader * self,bool origFormat,bool noClip,uint32_t minReadLen,bool signal)75 rc_t AbsolidReaderInit(const AbsolidReader* self,
76 bool origFormat, bool noClip, uint32_t minReadLen, bool signal)
77 {
78 rc_t rc = 0;
79 int options = origFormat ? eOrigFormat : 0;
80
81 CHECK_SELF(AbsolidReader);
82
83 options |= signal ? eSignal : 0;
84 options |= noClip ? 0 : eClipQual;
85 me->minReadLen = minReadLen;
86
87 if( (rc = SRAReaderInit(&self->dad, options, AbsolidReader_master_columns_desc)) == 0 &&
88 (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[0], &me->csread, NULL)) == 0 &&
89 (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[1], NULL, (const void***)&self->cs_key)) == 0 &&
90 (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[2], &me->qual1, NULL)) == 0 &&
91 (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[3], NULL, (const void***)&self->trim_start)) == 0 &&
92 (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[4], NULL, (const void***)&self->trim_len)) == 0 &&
93 (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[5], NULL, (const void***)&self->signal)) == 0 ) {
94 }
95 return rc;
96 }
97
AbsolidReaderMake(const AbsolidReader ** self,const SRATable * table,const char * accession,bool origFormat,bool noClip,uint32_t minReadLen,spotid_t minSpotId,spotid_t maxSpotId,bool signal)98 LIB_EXPORT rc_t CC AbsolidReaderMake(const AbsolidReader** self, const SRATable* table,
99 const char* accession, bool origFormat,
100 bool noClip, uint32_t minReadLen,
101 spotid_t minSpotId, spotid_t maxSpotId, bool signal)
102 {
103 rc_t rc = SRAReaderMake((const SRAReader**)self, sizeof **self, table, accession, minSpotId, maxSpotId);
104
105 if( rc == 0 ) {
106 rc = AbsolidReaderInit(*self, origFormat, noClip, minReadLen, signal);
107 }
108 if( rc != 0 ) {
109 AbsolidReaderWhack(*self);
110 *self = NULL;
111 }
112 return rc;
113 }
114
AbsolidReaderWhack(const AbsolidReader * self)115 LIB_EXPORT rc_t CC AbsolidReaderWhack(const AbsolidReader* self)
116 {
117 return SRAReaderWhack(&self->dad);
118 }
119
AbsolidReaderFirstSpot(const AbsolidReader * self)120 LIB_EXPORT rc_t CC AbsolidReaderFirstSpot(const AbsolidReader* self)
121 {
122 return SRAReaderFirstSpot(&self->dad);
123 }
124
AbsolidReaderSeekSpot(const AbsolidReader * self,spotid_t spot)125 LIB_EXPORT rc_t CC AbsolidReaderSeekSpot(const AbsolidReader* self, spotid_t spot)
126 {
127 return SRAReaderSeekSpot(&self->dad, spot);
128 }
129
AbsolidReaderNextSpot(const AbsolidReader * self)130 LIB_EXPORT rc_t CC AbsolidReaderNextSpot(const AbsolidReader* self)
131 {
132 return SRAReaderNextSpot(&self->dad);
133 }
134
AbsolidReaderCurrentSpot(const AbsolidReader * self,spotid_t * spot)135 LIB_EXPORT rc_t CC AbsolidReaderCurrentSpot(const AbsolidReader* self, spotid_t* spot)
136 {
137 return SRAReaderCurrentSpot(&self->dad, spot);
138 }
139
AbsolidReader_SpotInfo(const AbsolidReader * self,const char ** spotname,size_t * spotname_sz,uint32_t * spot_len,uint32_t * num_reads)140 LIB_EXPORT rc_t CC AbsolidReader_SpotInfo(const AbsolidReader* self,
141 const char** spotname, size_t* spotname_sz,
142 uint32_t* spot_len, uint32_t* num_reads)
143 {
144 rc_t rc = SRAReader_SpotInfo(&self->dad, spotname, spotname_sz, spot_len, num_reads);
145 if( rc == 0 ) {
146 if( spot_len ) {
147 if( self->dad.options & eClipQual ) {
148 *spot_len = **self->trim_len;
149 }
150 if( *spot_len < self->minReadLen ) {
151 *spot_len = 0;
152 }
153 }
154 }
155 return rc;
156 }
157
AbsolidReaderSpotName(const AbsolidReader * self,const char ** prefix,size_t * prefix_sz,const char ** suffix,size_t * suffix_sz)158 LIB_EXPORT rc_t CC AbsolidReaderSpotName(const AbsolidReader* self,
159 const char** prefix, size_t* prefix_sz,
160 const char** suffix, size_t* suffix_sz)
161 {
162 rc_t rc = 0;
163 const char* spotname;
164 size_t spotname_sz;
165
166 CHECK_SELF(AbsolidReader);
167
168 rc = SRAReader_SpotInfo(&self->dad, &spotname, &spotname_sz, NULL, NULL);
169 if( rc == 0 ) {
170 if( !self->prefix_sz || self->prefix_sz > spotname_sz || strncmp(spotname, self->prefix_buf, self->prefix_sz) != 0 ) {
171 if( spotname_sz == 0 ) {
172 me->prefix_sz = 0;
173 } else {
174 int k = 0;
175 size_t psz = spotname_sz;
176 while( psz > 0 && k < 3 ) {
177 /* take out PLATE_X_Y and optional label _(F|R)3 */
178 while( psz > 0 && isdigit( *(spotname + psz - 1)) ) {
179 psz--;
180 }
181
182 if( *(spotname + psz - 1) == 'F' || *(spotname + psz - 1) == 'R') {
183 /* Discard F|R and preceding underscore */
184 if( --psz > 0 && !isdigit(*(spotname + psz - 1)) ) {
185 psz--;
186 }
187 continue;
188 } else if( psz > 0 ) {
189 /* Discard underscore */
190 psz--;
191 k++;
192 }
193 }
194 if( psz > 0 ) {
195 /* Add one to restore underscore at end of prefix */
196 me->prefix_sz = psz + 1;
197 string_copy(me->prefix_buf, sizeof(me->prefix_buf), spotname, me->prefix_sz);
198 } else {
199 me->prefix_sz = 0;
200 }
201 }
202 me->prefix_buf[me->prefix_sz] = '\0';
203 }
204 if( suffix ) {
205 *suffix = &spotname[self->prefix_sz];
206 }
207 if( suffix_sz ) {
208 *suffix_sz = spotname_sz - self->prefix_sz;
209 }
210 if( prefix ) {
211 *prefix = self->prefix_buf;
212 }
213 if( prefix_sz ) {
214 *prefix_sz = self->prefix_sz;
215 }
216 }
217 return rc;
218
219 }
220
AbsolidReader_SpotReadInfo(const AbsolidReader * self,uint32_t readId,SRAReadTypes * read_type,const char ** read_label,INSDC_coord_len * read_label_sz,INSDC_coord_zero * read_start,INSDC_coord_len * read_len)221 LIB_EXPORT rc_t CC AbsolidReader_SpotReadInfo(const AbsolidReader* self, uint32_t readId, SRAReadTypes* read_type,
222 const char** read_label, INSDC_coord_len* read_label_sz,
223 INSDC_coord_zero* read_start, INSDC_coord_len* read_len)
224 {
225 INSDC_coord_zero rs;
226 INSDC_coord_len rl;
227
228 rc_t rc = SRAReader_SpotReadInfo(&self->dad, readId, read_type, read_label, read_label_sz, &rs, &rl);
229 if( rc == 0 ) {
230 if( read_start || read_len ) {
231 if( self->dad.options & eClipQual ) {
232 INSDC_coord_zero end = rs + rl - 1;
233 INSDC_coord_zero trim_end = ((**self->trim_start) + (**self->trim_len)) - 1;
234 if( end < (**self->trim_start) || rs > trim_end ) {
235 rl = 0;
236 } else {
237 if( (**self->trim_start) > rs && (**self->trim_start) <= end ) {
238 rl -= (**self->trim_start) - rs;
239 rs = (**self->trim_start);
240 }
241 if( end > trim_end ) {
242 rl = trim_end - rs + 1;
243 }
244 }
245 }
246 if( rl < self->minReadLen ) {
247 rl = 0;
248 }
249 if( read_start ) {
250 *read_start = rl ? rs : 0;
251 }
252 if( read_len ) {
253 *read_len = rl;
254 }
255 }
256 }
257 return rc;
258 }
259
AbsolidReaderHeader(const AbsolidReader * self,uint32_t readId,char * data,size_t dsize,size_t * written)260 LIB_EXPORT rc_t CC AbsolidReaderHeader(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
261 {
262 rc_t rc = 0;
263 int ret = 0;
264
265 const char* spotname;
266 size_t x;
267 int spotname_sz;
268 INSDC_coord_len read_label_sz = 0;
269 const char* read_label;
270
271 CHECK_SELF(AbsolidReader);
272 CHECK_SPOT(self->dad);
273
274 if( (rc = AbsolidReaderSpotName(me, NULL, NULL, &spotname, &x)) != 0 ) {
275 return rc;
276 }
277 spotname_sz = (int)x;
278 if( readId > 0 ) {
279 if( (rc = AbsolidReader_SpotReadInfo(self, readId, NULL, &read_label, &read_label_sz, NULL, NULL)) != 0 ) {
280 return rc;
281 }
282 }
283 if( self->dad.options & eOrigFormat ) {
284 char tmp[1024];
285 if( spotname_sz == 0 ) {
286 spotname_sz = snprintf(tmp, sizeof(tmp) - 1, "%s.%lld", self->dad.accession, ( long long int ) self->dad.spot);
287 if ( spotname_sz < 0 )
288 return RC ( rcSRA, rcString, rcConstructing, rcData, rcCorrupt );
289 spotname = tmp;
290 }
291 if( readId > 0 && read_label_sz > 0 ) {
292 ret = snprintf(data, dsize, ">%.*s%s%.*s", spotname_sz, spotname, spotname_sz ? "_" : "", read_label_sz, read_label);
293 } else {
294 ret = snprintf(data, dsize, ">%.*s", spotname_sz, spotname);
295 }
296 } else {
297 if( readId > 0 && read_label_sz > 0 ) {
298 ret = snprintf(data, dsize, ">%s.%lld %.*s%s%.*s",
299 self->dad.accession, ( long long int ) self->dad.spot, spotname_sz, spotname, spotname_sz ? "_" : "", read_label_sz, read_label);
300 } else {
301 ret = snprintf(data, dsize, ">%s.%lld %.*s",
302 self->dad.accession, ( long long int ) self->dad.spot, spotname_sz, spotname);
303 }
304 }
305 #if SNPRINTF_ACTUALLY_WORKED_THE_WAY_YOU_THINK
306 if( ret < 0 ) {
307 ret = 0;
308 rc = RC(rcSRA, rcString, rcConstructing, rcMessage, rcUnknown);
309 } else if( ret >= (int)dsize ) {
310 #else
311 if( ret < 0 || ret >= (int)dsize ) {
312 #endif
313 rc = RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
314 }
315 if( written != NULL ) {
316 *written = ret;
317 }
318 return rc;
319 }
320
321 LIB_EXPORT rc_t CC AbsolidReaderBase(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
322 {
323 rc_t rc = 0;
324 INSDC_coord_zero read_start = 0;
325 INSDC_coord_len read_len = 0;
326
327 CHECK_SELF(AbsolidReader);
328 CHECK_SPOT(self->dad);
329
330 if( readId > 0 ) {
331 if( (rc = AbsolidReader_SpotReadInfo(self, readId--, NULL, NULL, NULL, &read_start, &read_len)) != 0 ) {
332 return rc;
333 }
334 } else {
335 return RC(rcSRA, rcFormatter, rcConstructing, rcFormat, rcUnsupported);
336 }
337 if( read_len < self->minReadLen ) {
338 read_len = 0;
339 }
340 /* for cs_key */
341 read_len++;
342 if( written != NULL ) {
343 *written = read_len;
344 }
345 if( read_len >= dsize ) {
346 return RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
347 } else {
348 const char* b = self->csread->base;
349 data[0] = (*me->cs_key)[readId];
350 memmove(&data[1], &b[read_start], read_len - 1);
351 }
352 data[read_len] = '\0';
353 return rc;
354 }
355
356 LIB_EXPORT rc_t CC AbsolidReaderQuality(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
357 {
358 rc_t rc = 0;
359 INSDC_coord_zero read_start = 0;
360 INSDC_coord_len j = 0, read_len = 0;
361
362 CHECK_SELF(AbsolidReader);
363 CHECK_SPOT(self->dad);
364
365 if( readId > 0 ) {
366 if( (rc = AbsolidReader_SpotReadInfo(self, readId, NULL, NULL, NULL, &read_start, &read_len)) != 0 ) {
367 return rc;
368 }
369 } else {
370 return RC(rcSRA, rcFormatter, rcConstructing, rcFormat, rcUnsupported);
371 }
372 if( read_len >= self->minReadLen && me->qual1->size ) {
373 const int8_t* q = me->qual1->base;
374 char* d = data;
375 INSDC_coord_len i;
376
377 /* read end */
378 read_len += read_start;
379 for(i = read_start; i < read_len; i++) {
380 int x;
381 if( j + 2 > dsize ) {
382 /* do not overflow buffer in case it's too small */
383 d = data;
384 }
385 x = snprintf(d, dsize - j, "%i ", (int)(q[i]));
386 if ( x < 0 )
387 return RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
388 d += x;
389 j += x;
390 }
391 j--;
392 d[j] = '\0';
393 }
394 if( written != NULL ) {
395 *written = j;
396 }
397 if( j >= dsize ) {
398 rc = RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
399 }
400 return rc;
401 }
402
403 static
404 rc_t AbsolidReaderSignal(const AbsolidReader* self, uint32_t readId, int idx, char* data, size_t dsize, size_t* written)
405 {
406 rc_t rc = 0;
407 INSDC_coord_len j = 0;
408
409 CHECK_SELF(AbsolidReader);
410 CHECK_SPOT(self->dad);
411
412 if( me->signal != NULL ) {
413 INSDC_coord_zero read_start = 0;
414 INSDC_coord_len read_len = 0;
415 if( readId > 0 ) {
416 if( (rc = AbsolidReader_SpotReadInfo(self, readId, NULL, NULL, NULL, &read_start, &read_len)) != 0 ) {
417 return rc;
418 }
419 } else {
420 return RC(rcSRA, rcFormatter, rcConstructing, rcFormat, rcUnsupported);
421 }
422 if( read_len >= self->minReadLen ) {
423 const float* s = *me->signal;
424 char* d = data;
425 INSDC_coord_len i;
426
427 /* read end */
428 read_len += read_start;
429 for(i = read_start; i < read_len; i++) {
430 int x;
431 if( j + 9 > dsize ) {
432 /* do not overflow buffer in case it's too small */
433 d = data;
434 }
435 x = snprintf(d, dsize - j, "%.6g ", s[i * 4 + idx]);
436 if ( x < 0 )
437 return RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
438 d += x;
439 j += x;
440 }
441 *d = '\0';
442 --j;
443 }
444 }
445 if( written != NULL ) {
446 *written = j;
447 }
448 if( j >= dsize ) {
449 rc = RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
450 }
451 return rc;
452
453 }
454
455 LIB_EXPORT rc_t CC AbsolidReaderSignalFTC(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
456 {
457 return AbsolidReaderSignal(self, readId, 0, data, dsize, written);
458 }
459
460 LIB_EXPORT rc_t CC AbsolidReaderSignalCY3(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
461 {
462 return AbsolidReaderSignal(self, readId, 1, data, dsize, written);
463 }
464
465 LIB_EXPORT rc_t CC AbsolidReaderSignalTXR(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
466 {
467 return AbsolidReaderSignal(self, readId, 2, data, dsize, written);
468 }
469
470 LIB_EXPORT rc_t CC AbsolidReaderSignalCY5(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
471 {
472 return AbsolidReaderSignal(self, readId, 3, data, dsize, written);
473 }
474