1 /*=========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government have not placed any restriction on its use or reproduction. 12 * 13 * Although all reasonable efforts have been taken to ensure the accuracy 14 * and reliability of the software and data, the NLM and the U.S. 15 * Government do not and cannot warrant the performance or results that 16 * may be obtained by using this software or data. The NLM and the U.S. 17 * Government disclaim all warranties, express or implied, including 18 * warranties of performance, merchantability or fitness for any particular 19 * purpose. 20 * 21 * Please cite the author in any work or product based on this material. 22 * 23 * =========================================================================== 24 * 25 */ 26 27 #include <unistd.h> 28 29 #include <klib/rc.h> 30 31 #include <insdc/sra.h> 32 33 #include "id2name.h" 34 35 struct SequenceWriter; 36 struct KLoadProgressbar; 37 38 /*-------------------------------------------------------------------------- 39 * ctx_value_t, FragmentInfo 40 */ 41 typedef struct { 42 /* uint64_t spotId; */ 43 int64_t fragmentOffset; 44 uint16_t fragmentSize; 45 uint16_t seqHash[2]; 46 uint8_t unmated: 1, 47 has_a_read: 1, 48 written: 1; 49 } ctx_value_t; 50 51 #define CTX_VALUE_SET_S_ID(A, B) ((void)(B)) 52 53 typedef struct FragmentInfo 54 { 55 uint32_t readlen; 56 uint8_t is_bad; 57 uint8_t orientation; 58 uint8_t otherReadNo; 59 uint8_t sglen; 60 uint8_t cskey; 61 } FragmentInfo; 62 63 typedef struct { 64 int64_t id; 65 FragmentInfo *data; 66 } FragmentEntry; 67 68 /*-------------------------------------------------------------------------- 69 * SpotAssembler 70 */ 71 72 #define FRAGMENT_HOT_COUNT (1024u * 1024u) 73 #define NUM_ID_SPACES (256u) 74 75 typedef struct SpotAssembler 76 { 77 /* settings */ 78 size_t cache_size; 79 const char * tmpfs; 80 uint64_t pid; 81 82 struct KBTree *key2id[NUM_ID_SPACES]; 83 char *key2id_names; 84 85 struct MMArray *id2value; 86 int64_t spotId; 87 int64_t nextFragment; 88 89 FragmentEntry * fragment; /* [FRAGMENT_HOT_COUNT] */ 90 91 Id2name id2name; /* idKey -> readname */ 92 93 uint32_t idCount[NUM_ID_SPACES]; 94 uint32_t key2id_hash[NUM_ID_SPACES]; 95 96 size_t key2id_max; 97 size_t key2id_name_max; 98 size_t key2id_name_alloc; 99 size_t key2id_count; 100 101 size_t key2id_name[NUM_ID_SPACES]; 102 /* this array is kept in name order */ 103 /* this maps the names to key2id and idCount */ 104 size_t key2id_oid[NUM_ID_SPACES]; 105 106 int fragmentFd; 107 } SpotAssembler; 108 109 rc_t SpotAssemblerMake(SpotAssembler **ctx, size_t cache_size, const char * tmpfs, uint64_t pid); 110 void SpotAssemblerRelease(SpotAssembler * ctx); 111 112 ctx_value_t * SpotAssemblerGetCtxValue(SpotAssembler * self, rc_t *const prc, uint64_t const keyId); 113 114 FragmentEntry * SpotAssemblerGetFragmentEntry(SpotAssembler * self, uint64_t keyId); 115 116 rc_t SpotAssemblerGetKeyID(SpotAssembler *const ctx, 117 uint64_t *const rslt, 118 bool *const wasInserted, 119 char const key[], 120 char const name[], 121 size_t const o_namelen); 122 123 unsigned SeqHashKey(void const *const key, size_t const keylen); 124 125 void SpotAssemblerReleaseMemBank(SpotAssembler *ctx); 126 127 rc_t SpotAssemblerWriteSoloFragments(SpotAssembler * ctx, 128 bool isColorSpace, 129 INSDC_SRA_platform_id platform, 130 bool keepMismatchQual, 131 bool no_real_output, 132 bool hasTI, 133 const char * QualQuantizer, 134 bool dropReadnames, 135 struct SequenceWriter * seq, 136 const struct KLoadProgressbar *progress); 137 138