1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #include <sra/rd-extern.h>
27 
28 #include <klib/rc.h>
29 #include <sra/types.h>
30 #include <sra/abi.h>
31 #include <sysalloc.h>
32 #include <klib/text.h>
33 
34 #include "reader-cmn.h"
35 
36 #include <stdio.h>
37 #include <string.h>
38 #include <ctype.h>
39 #include <os-native.h>
40 
41 typedef enum AbsolidReaderOptions_enum {
42     eOrigFormat  = 0x02,
43     eSignal      = 0x04,
44     eClipQual    = 0x08
45 } AbsolidReaderOptions;
46 
47 /* column order is important here: see Init function below!!! */
48 static
49 const SRAReaderColumn AbsolidReader_master_columns_desc[] = {
50     {SRAREADER_COL_MANDATORY, "CSREAD", insdc_csfasta_t, NULL, NULL, 0},
51     {SRAREADER_COL_MANDATORY, "CS_KEY", insdc_fasta_t, NULL, NULL, 0},
52     {SRAREADER_COL_MANDATORY, "QUALITY", insdc_phred_t, NULL, NULL, 0},
53     {eClipQual, "TRIM_START", "INSDC:coord:zero", NULL, NULL, 0},
54     {eClipQual, "TRIM_LEN", "INSDC:coord:len", NULL, NULL, 0},
55     {eSignal | SRAREADER_COL_OPTIONAL, "SIGNAL", ncbi_fsamp4_t, NULL, NULL, 0},
56     {SRAREADER_COL_MANDATORY, NULL, NULL, NULL, NULL, 0} /* terminator */
57 };
58 
59 struct AbsolidReader {
60     /* SRAReader always must be a first member! */
61     SRAReader dad;
62     uint32_t minReadLen;
63     /* current spot data shortcuts */
64     const SRAReaderColumn* csread;
65     const char** cs_key;
66     const SRAReaderColumn* qual1;
67     const INSDC_coord_zero** trim_start;
68     const INSDC_coord_len** trim_len;
69     const float** signal;
70     char prefix_buf[1024];
71     size_t prefix_sz;
72 };
73 
74 static
AbsolidReaderInit(const AbsolidReader * self,bool origFormat,bool noClip,uint32_t minReadLen,bool signal)75 rc_t AbsolidReaderInit(const AbsolidReader* self,
76                        bool origFormat, bool noClip, uint32_t minReadLen, bool signal)
77 {
78     rc_t rc = 0;
79     int options = origFormat ? eOrigFormat : 0;
80 
81     CHECK_SELF(AbsolidReader);
82 
83     options |= signal ? eSignal : 0;
84     options |= noClip ? 0 : eClipQual;
85     me->minReadLen = minReadLen;
86 
87     if( (rc = SRAReaderInit(&self->dad, options, AbsolidReader_master_columns_desc)) == 0 &&
88         (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[0], &me->csread, NULL)) == 0 &&
89         (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[1], NULL, (const void***)&self->cs_key)) == 0 &&
90         (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[2], &me->qual1, NULL)) == 0 &&
91         (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[3], NULL, (const void***)&self->trim_start)) == 0 &&
92         (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[4], NULL, (const void***)&self->trim_len)) == 0 &&
93         (rc = SRAReader_FindColData(&self->dad, &AbsolidReader_master_columns_desc[5], NULL, (const void***)&self->signal)) == 0 ) {
94     }
95     return rc;
96 }
97 
AbsolidReaderMake(const AbsolidReader ** self,const SRATable * table,const char * accession,bool origFormat,bool noClip,uint32_t minReadLen,spotid_t minSpotId,spotid_t maxSpotId,bool signal)98 LIB_EXPORT rc_t CC AbsolidReaderMake(const AbsolidReader** self, const SRATable* table,
99                                      const char* accession, bool origFormat,
100                                      bool noClip, uint32_t minReadLen,
101                                      spotid_t minSpotId, spotid_t maxSpotId, bool signal)
102 {
103     rc_t rc = SRAReaderMake((const SRAReader**)self, sizeof **self, table, accession, minSpotId, maxSpotId);
104 
105     if( rc == 0 ) {
106         rc = AbsolidReaderInit(*self, origFormat, noClip, minReadLen, signal);
107     }
108     if( rc != 0 ) {
109         AbsolidReaderWhack(*self);
110         *self = NULL;
111     }
112     return rc;
113 }
114 
AbsolidReaderWhack(const AbsolidReader * self)115 LIB_EXPORT rc_t CC AbsolidReaderWhack(const AbsolidReader* self)
116 {
117     return SRAReaderWhack(&self->dad);
118 }
119 
AbsolidReaderFirstSpot(const AbsolidReader * self)120 LIB_EXPORT rc_t CC AbsolidReaderFirstSpot(const AbsolidReader* self)
121 {
122     return SRAReaderFirstSpot(&self->dad);
123 }
124 
AbsolidReaderSeekSpot(const AbsolidReader * self,spotid_t spot)125 LIB_EXPORT rc_t CC AbsolidReaderSeekSpot(const AbsolidReader* self, spotid_t spot)
126 {
127     return SRAReaderSeekSpot(&self->dad, spot);
128 }
129 
AbsolidReaderNextSpot(const AbsolidReader * self)130 LIB_EXPORT rc_t CC AbsolidReaderNextSpot(const AbsolidReader* self)
131 {
132     return SRAReaderNextSpot(&self->dad);
133 }
134 
AbsolidReaderCurrentSpot(const AbsolidReader * self,spotid_t * spot)135 LIB_EXPORT rc_t CC AbsolidReaderCurrentSpot(const AbsolidReader* self, spotid_t* spot)
136 {
137     return SRAReaderCurrentSpot(&self->dad, spot);
138 }
139 
AbsolidReader_SpotInfo(const AbsolidReader * self,const char ** spotname,size_t * spotname_sz,uint32_t * spot_len,uint32_t * num_reads)140 LIB_EXPORT rc_t CC AbsolidReader_SpotInfo(const AbsolidReader* self,
141                                           const char** spotname, size_t* spotname_sz,
142                                           uint32_t* spot_len, uint32_t* num_reads)
143 {
144     rc_t rc = SRAReader_SpotInfo(&self->dad, spotname, spotname_sz, spot_len, num_reads);
145     if( rc == 0 ) {
146         if( spot_len ) {
147             if( self->dad.options & eClipQual ) {
148                 *spot_len = **self->trim_len;
149             }
150             if( *spot_len < self->minReadLen ) {
151                 *spot_len = 0;
152             }
153         }
154     }
155     return rc;
156 }
157 
AbsolidReaderSpotName(const AbsolidReader * self,const char ** prefix,size_t * prefix_sz,const char ** suffix,size_t * suffix_sz)158 LIB_EXPORT rc_t CC AbsolidReaderSpotName(const AbsolidReader* self,
159                                          const char** prefix, size_t* prefix_sz,
160                                          const char** suffix, size_t* suffix_sz)
161 {
162     rc_t rc = 0;
163     const char* spotname;
164     size_t spotname_sz;
165 
166     CHECK_SELF(AbsolidReader);
167 
168     rc = SRAReader_SpotInfo(&self->dad, &spotname, &spotname_sz, NULL, NULL);
169     if( rc == 0 ) {
170         if( !self->prefix_sz || self->prefix_sz > spotname_sz || strncmp(spotname, self->prefix_buf, self->prefix_sz) != 0 ) {
171             if( spotname_sz == 0 ) {
172                 me->prefix_sz = 0;
173             } else {
174                 int k = 0;
175                 size_t psz = spotname_sz;
176                 while( psz > 0 && k < 3 ) {
177                     /* take out PLATE_X_Y and optional label _(F|R)3 */
178                     while( psz > 0 && isdigit( *(spotname + psz - 1)) ) {
179                         psz--;
180                     }
181 
182                     if( *(spotname + psz - 1) == 'F' || *(spotname + psz - 1) == 'R') {
183                         /* Discard F|R and preceding underscore */
184                         if( --psz > 0 && !isdigit(*(spotname + psz - 1)) ) {
185                             psz--;
186                         }
187                         continue;
188                     } else if( psz > 0 ) {
189                         /* Discard underscore */
190                         psz--;
191                         k++;
192                     }
193                 }
194                 if( psz > 0 ) {
195                     /* Add one to restore underscore at end of prefix */
196                     me->prefix_sz = psz + 1;
197                     string_copy(me->prefix_buf, sizeof(me->prefix_buf), spotname, me->prefix_sz);
198                 } else {
199                     me->prefix_sz = 0;
200                 }
201             }
202             me->prefix_buf[me->prefix_sz] = '\0';
203         }
204         if( suffix ) {
205             *suffix = &spotname[self->prefix_sz];
206         }
207         if( suffix_sz ) {
208             *suffix_sz = spotname_sz - self->prefix_sz;
209         }
210         if( prefix ) {
211             *prefix = self->prefix_buf;
212         }
213         if( prefix_sz ) {
214             *prefix_sz = self->prefix_sz;
215         }
216     }
217     return rc;
218 
219 }
220 
AbsolidReader_SpotReadInfo(const AbsolidReader * self,uint32_t readId,SRAReadTypes * read_type,const char ** read_label,INSDC_coord_len * read_label_sz,INSDC_coord_zero * read_start,INSDC_coord_len * read_len)221 LIB_EXPORT rc_t CC AbsolidReader_SpotReadInfo(const AbsolidReader* self, uint32_t readId, SRAReadTypes* read_type,
222                                               const char** read_label, INSDC_coord_len* read_label_sz,
223                                               INSDC_coord_zero* read_start, INSDC_coord_len* read_len)
224 {
225     INSDC_coord_zero rs;
226     INSDC_coord_len rl;
227 
228     rc_t rc = SRAReader_SpotReadInfo(&self->dad, readId, read_type, read_label, read_label_sz, &rs, &rl);
229     if( rc == 0 ) {
230         if( read_start || read_len ) {
231             if( self->dad.options & eClipQual ) {
232                 INSDC_coord_zero end = rs + rl - 1;
233                 INSDC_coord_zero trim_end = ((**self->trim_start) + (**self->trim_len)) - 1;
234                 if( end < (**self->trim_start) || rs > trim_end ) {
235                     rl = 0;
236                 } else {
237                     if( (**self->trim_start) > rs && (**self->trim_start) <= end ) {
238                         rl -= (**self->trim_start) - rs;
239                         rs = (**self->trim_start);
240                     }
241                     if( end > trim_end ) {
242                         rl = trim_end - rs + 1;
243                     }
244                 }
245             }
246             if( rl < self->minReadLen ) {
247                 rl = 0;
248             }
249             if( read_start ) {
250                 *read_start = rl ? rs : 0;
251             }
252             if( read_len ) {
253                 *read_len = rl;
254             }
255         }
256     }
257     return rc;
258 }
259 
AbsolidReaderHeader(const AbsolidReader * self,uint32_t readId,char * data,size_t dsize,size_t * written)260 LIB_EXPORT rc_t CC AbsolidReaderHeader(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
261 {
262     rc_t rc = 0;
263     int ret = 0;
264 
265     const char* spotname;
266     size_t x;
267     int spotname_sz;
268     INSDC_coord_len read_label_sz = 0;
269     const char* read_label;
270 
271     CHECK_SELF(AbsolidReader);
272     CHECK_SPOT(self->dad);
273 
274     if( (rc = AbsolidReaderSpotName(me, NULL, NULL, &spotname, &x)) != 0 ) {
275         return rc;
276     }
277     spotname_sz = (int)x;
278     if( readId > 0 ) {
279         if( (rc = AbsolidReader_SpotReadInfo(self, readId, NULL, &read_label, &read_label_sz, NULL, NULL)) != 0 ) {
280             return rc;
281         }
282     }
283     if( self->dad.options & eOrigFormat ) {
284         char tmp[1024];
285         if( spotname_sz == 0 ) {
286             spotname_sz = snprintf(tmp, sizeof(tmp) - 1, "%s.%lld", self->dad.accession, ( long long int ) self->dad.spot);
287             if ( spotname_sz < 0 )
288                 return RC ( rcSRA, rcString, rcConstructing, rcData, rcCorrupt );
289             spotname = tmp;
290         }
291         if( readId > 0 && read_label_sz > 0 ) {
292             ret = snprintf(data, dsize, ">%.*s%s%.*s", spotname_sz, spotname, spotname_sz ? "_" : "", read_label_sz, read_label);
293         } else {
294             ret = snprintf(data, dsize, ">%.*s", spotname_sz, spotname);
295         }
296     } else {
297         if( readId > 0 && read_label_sz > 0 ) {
298             ret = snprintf(data, dsize, ">%s.%lld %.*s%s%.*s",
299                 self->dad.accession, ( long long int ) self->dad.spot, spotname_sz, spotname, spotname_sz ? "_" : "", read_label_sz, read_label);
300         } else {
301             ret = snprintf(data, dsize, ">%s.%lld %.*s",
302                            self->dad.accession, ( long long int ) self->dad.spot, spotname_sz, spotname);
303         }
304     }
305 #if SNPRINTF_ACTUALLY_WORKED_THE_WAY_YOU_THINK
306     if( ret < 0 ) {
307         ret = 0;
308         rc = RC(rcSRA, rcString, rcConstructing, rcMessage, rcUnknown);
309     } else if( ret >= (int)dsize ) {
310 #else
311     if( ret < 0 || ret >= (int)dsize ) {
312 #endif
313         rc = RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
314     }
315     if( written != NULL ) {
316         *written = ret;
317     }
318     return rc;
319 }
320 
321 LIB_EXPORT rc_t CC AbsolidReaderBase(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
322 {
323     rc_t rc = 0;
324     INSDC_coord_zero read_start = 0;
325     INSDC_coord_len read_len = 0;
326 
327     CHECK_SELF(AbsolidReader);
328     CHECK_SPOT(self->dad);
329 
330     if( readId > 0 ) {
331         if( (rc = AbsolidReader_SpotReadInfo(self, readId--, NULL, NULL, NULL, &read_start, &read_len)) != 0 ) {
332             return rc;
333         }
334     } else {
335         return RC(rcSRA, rcFormatter, rcConstructing, rcFormat, rcUnsupported);
336     }
337     if( read_len < self->minReadLen ) {
338         read_len = 0;
339     }
340     /* for cs_key */
341     read_len++;
342     if( written != NULL ) {
343         *written = read_len;
344     }
345     if( read_len >= dsize ) {
346         return RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
347     } else {
348         const char* b = self->csread->base;
349         data[0] = (*me->cs_key)[readId];
350         memmove(&data[1], &b[read_start], read_len - 1);
351     }
352     data[read_len] = '\0';
353     return rc;
354 }
355 
356 LIB_EXPORT rc_t CC AbsolidReaderQuality(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
357 {
358     rc_t rc = 0;
359     INSDC_coord_zero read_start = 0;
360     INSDC_coord_len j = 0, read_len = 0;
361 
362     CHECK_SELF(AbsolidReader);
363     CHECK_SPOT(self->dad);
364 
365     if( readId > 0 ) {
366         if( (rc = AbsolidReader_SpotReadInfo(self, readId, NULL, NULL, NULL, &read_start, &read_len)) != 0 ) {
367             return rc;
368         }
369     } else {
370         return RC(rcSRA, rcFormatter, rcConstructing, rcFormat, rcUnsupported);
371     }
372     if( read_len >= self->minReadLen && me->qual1->size ) {
373         const int8_t* q = me->qual1->base;
374         char* d = data;
375         INSDC_coord_len i;
376 
377         /* read end */
378         read_len += read_start;
379         for(i = read_start; i < read_len; i++) {
380             int x;
381             if( j + 2 > dsize ) {
382                 /* do not overflow buffer in case it's too small */
383                 d = data;
384             }
385             x = snprintf(d, dsize - j, "%i ", (int)(q[i]));
386             if ( x < 0 )
387                 return RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
388             d += x;
389             j += x;
390         }
391         j--;
392         d[j] = '\0';
393     }
394     if( written != NULL ) {
395         *written = j;
396     }
397     if( j >= dsize ) {
398         rc = RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
399     }
400     return rc;
401 }
402 
403 static
404 rc_t AbsolidReaderSignal(const AbsolidReader* self, uint32_t readId, int idx, char* data, size_t dsize, size_t* written)
405 {
406     rc_t rc = 0;
407     INSDC_coord_len j = 0;
408 
409     CHECK_SELF(AbsolidReader);
410     CHECK_SPOT(self->dad);
411 
412     if( me->signal != NULL ) {
413         INSDC_coord_zero read_start = 0;
414         INSDC_coord_len read_len = 0;
415         if( readId > 0 ) {
416             if( (rc = AbsolidReader_SpotReadInfo(self, readId, NULL, NULL, NULL, &read_start, &read_len)) != 0 ) {
417                 return rc;
418             }
419         } else {
420             return RC(rcSRA, rcFormatter, rcConstructing, rcFormat, rcUnsupported);
421         }
422         if( read_len >= self->minReadLen ) {
423             const float* s = *me->signal;
424             char* d = data;
425             INSDC_coord_len i;
426 
427             /* read end */
428             read_len += read_start;
429             for(i = read_start; i < read_len; i++) {
430                 int x;
431                 if( j + 9 > dsize ) {
432                     /* do not overflow buffer in case it's too small */
433                     d = data;
434                 }
435                 x = snprintf(d, dsize - j, "%.6g ", s[i * 4 + idx]);
436                 if ( x < 0 )
437                     return RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
438                 d += x;
439                 j += x;
440             }
441             *d = '\0';
442             --j;
443         }
444     }
445     if( written != NULL ) {
446         *written = j;
447     }
448     if( j >= dsize ) {
449         rc = RC(rcSRA, rcString, rcConstructing, rcMemory, rcInsufficient);
450     }
451     return rc;
452 
453 }
454 
455 LIB_EXPORT rc_t CC AbsolidReaderSignalFTC(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
456 {
457     return AbsolidReaderSignal(self, readId, 0, data, dsize, written);
458 }
459 
460 LIB_EXPORT rc_t CC AbsolidReaderSignalCY3(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
461 {
462     return AbsolidReaderSignal(self, readId, 1, data, dsize, written);
463 }
464 
465 LIB_EXPORT rc_t CC AbsolidReaderSignalTXR(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
466 {
467     return AbsolidReaderSignal(self, readId, 2, data, dsize, written);
468 }
469 
470 LIB_EXPORT rc_t CC AbsolidReaderSignalCY5(const AbsolidReader* self, uint32_t readId, char* data, size_t dsize, size_t* written)
471 {
472     return AbsolidReaderSignal(self, readId, 3, data, dsize, written);
473 }
474