1 /*===========================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  */
26 
27 #include <unistd.h>
28 
29 #include <klib/rc.h>
30 
31 #include <insdc/sra.h>
32 
33 #include "id2name.h"
34 
35 struct SequenceWriter;
36 struct KLoadProgressbar;
37 
38 /*--------------------------------------------------------------------------
39  * ctx_value_t, FragmentInfo
40  */
41 typedef struct {
42 /*    uint64_t spotId; */
43     int64_t fragmentOffset;
44     uint16_t fragmentSize;
45     uint16_t seqHash[2];
46     uint8_t  unmated: 1,
47              has_a_read: 1,
48              written: 1;
49 } ctx_value_t;
50 
51 #define CTX_VALUE_SET_S_ID(A, B) ((void)(B))
52 
53 typedef struct FragmentInfo
54 {
55     uint32_t readlen;
56     uint8_t  is_bad;
57     uint8_t  orientation;
58     uint8_t  otherReadNo;
59     uint8_t  sglen;
60     uint8_t  cskey;
61 } FragmentInfo;
62 
63 typedef struct {
64     int64_t id;
65     FragmentInfo *data;
66 } FragmentEntry;
67 
68 /*--------------------------------------------------------------------------
69  * SpotAssembler
70  */
71 
72 #define FRAGMENT_HOT_COUNT (1024u * 1024u)
73 #define NUM_ID_SPACES (256u)
74 
75 typedef struct SpotAssembler
76 {
77     /* settings */
78     size_t cache_size;
79     const char * tmpfs;
80     uint64_t pid;
81 
82     struct KBTree *key2id[NUM_ID_SPACES];
83     char *key2id_names;
84 
85     struct MMArray *id2value;
86     int64_t spotId;
87     int64_t nextFragment;
88 
89     FragmentEntry * fragment; /* [FRAGMENT_HOT_COUNT] */
90 
91     Id2name id2name; /* idKey -> readname */
92 
93     uint32_t idCount[NUM_ID_SPACES];
94     uint32_t key2id_hash[NUM_ID_SPACES];
95 
96     size_t key2id_max;
97     size_t key2id_name_max;
98     size_t key2id_name_alloc;
99     size_t key2id_count;
100 
101     size_t key2id_name[NUM_ID_SPACES];
102     /* this array is kept in name order */
103     /* this maps the names to key2id and idCount */
104     size_t key2id_oid[NUM_ID_SPACES];
105 
106     int fragmentFd;
107 } SpotAssembler;
108 
109 rc_t SpotAssemblerMake(SpotAssembler **ctx, size_t cache_size, const char * tmpfs, uint64_t pid);
110 void SpotAssemblerRelease(SpotAssembler * ctx);
111 
112 ctx_value_t * SpotAssemblerGetCtxValue(SpotAssembler * self, rc_t *const prc, uint64_t const keyId);
113 
114 FragmentEntry * SpotAssemblerGetFragmentEntry(SpotAssembler * self, uint64_t keyId);
115 
116 rc_t SpotAssemblerGetKeyID(SpotAssembler *const ctx,
117                            uint64_t *const rslt,
118                            bool *const wasInserted,
119                            char const key[],
120                            char const name[],
121                            size_t const o_namelen);
122 
123 unsigned SeqHashKey(void const *const key, size_t const keylen);
124 
125 void SpotAssemblerReleaseMemBank(SpotAssembler *ctx);
126 
127 rc_t SpotAssemblerWriteSoloFragments(SpotAssembler * ctx,
128                                      bool isColorSpace,
129                                      INSDC_SRA_platform_id platform,
130                                      bool keepMismatchQual,
131                                      bool no_real_output,
132                                      bool hasTI,
133                                      const char * QualQuantizer,
134                                      bool dropReadnames,
135                                      struct SequenceWriter * seq,
136                                      const struct KLoadProgressbar *progress);
137 
138