1
2 /******************************************************************************
3 *
4 * This file is part of canu, a software program that assembles whole-genome
5 * sequencing reads into contigs.
6 *
7 * This software is based on:
8 * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9 * the 'kmer package' r1994 (http://kmer.sourceforge.net)
10 *
11 * Except as indicated otherwise, this is a 'United States Government Work',
12 * and is released in the public domain.
13 *
14 * File 'README.licenses' in the root directory of this distribution
15 * contains full conditions and disclaimers.
16 */
17
18 #include "runtime.H"
19 #include "files.H"
20
21 #include "bed.H"
22
23
24
25 // Search for canu-specific names, and convert to tigID's.
26 static
27 uint32
nameToCanuID(char * name)28 nameToCanuID(char *name) {
29 uint32 id = UINT32_MAX;
30
31 if ((name[0] == 't') &&
32 (name[1] == 'i') &&
33 (name[2] == 'g'))
34 id = strtoll(name + 3, NULL, 10);
35
36 if ((name[0] == 'c') &&
37 (name[1] == 't') &&
38 (name[2] == 'g'))
39 id = strtoll(name + 3, NULL, 10);
40
41 if ((name[0] == 'u') &&
42 (name[1] == 't') &&
43 (name[2] == 'g'))
44 id = strtoll(name + 3, NULL, 10);
45
46 return(id);
47 }
48
49
50
bedRecord()51 bedRecord::bedRecord() {
52 _Aname = NULL;
53 _Aid = UINT32_MAX;
54
55 _bgn = UINT32_MAX;
56 _end = 0;
57
58 _Bname = NULL;
59 _Bid = UINT32_MAX;
60
61 _score = 0;
62 _Bfwd = false;
63 }
64
65
bedRecord(char * inLine)66 bedRecord::bedRecord(char *inLine) {
67 load(inLine);
68 }
69
70
~bedRecord()71 bedRecord::~bedRecord() {
72 delete [] _Aname;
73 delete [] _Bname;
74 }
75
76
77 void
load(char * inLine)78 bedRecord::load(char *inLine) {
79 splitToWords W(inLine);
80
81 _Aname = new char [strlen(W[0]) + 1];
82 _Aid = UINT32_MAX;
83
84 _bgn = W.toint32(1);
85 _end = W.toint32(2);
86
87 _Bname = new char [strlen(W[3]) + 1];
88 _Bid = UINT32_MAX;
89
90 _score = W.touint32(4);
91 _Bfwd = W[5][0] == '+';
92
93 strcpy(_Aname, W[0]);
94 strcpy(_Bname, W[3]);
95
96 _Aid = nameToCanuID(_Aname); // Search for canu-specific names, and convert to tigID's.
97 _Bid = nameToCanuID(_Bname);
98 }
99
100
101 void
save(FILE * outFile)102 bedRecord::save(FILE *outFile) {
103 fprintf(outFile, "%s\t%d\t%d\t%s\t%u\t%c\n",
104 _Aname, _bgn, _end, _Bname, _score, (_Bfwd == true) ? '+' : '-');
105 }
106
107
108
bedFile(char * inName)109 bedFile::bedFile(char *inName) {
110 loadFile(inName);
111 }
112
113
~bedFile()114 bedFile::~bedFile() {
115 for (uint32 ii=0; ii<_records.size(); ii++)
116 delete _records[ii];
117 }
118
119
120 bool
loadFile(char * inName)121 bedFile::loadFile(char *inName) {
122 char *L = NULL;
123 uint32 Llen = 0;
124 uint32 Lmax = 0;
125
126 FILE *F = AS_UTL_openInputFile(inName);
127
128 while (AS_UTL_readLine(L, Llen, Lmax, F)) {
129 _records.push_back(new bedRecord(L));
130 }
131
132 AS_UTL_closeFile(F, inName);
133
134 delete [] L;
135
136 fprintf(stderr, "bed: Loaded " F_SIZE_T " records.\n", _records.size());
137
138 return(true);
139 }
140
141
142
143
144 bool
saveFile(char * outName)145 bedFile::saveFile(char *outName) {
146
147 FILE *F = AS_UTL_openOutputFile(outName);
148
149 for (uint32 ii=0; ii<_records.size(); ii++)
150 if (_records[ii])
151 _records[ii]->save(F);
152
153 AS_UTL_closeFile(F, outName);
154
155 return(true);
156 }
157
158