1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #include "runtime.H"
19 #include "files.H"
20 
21 #include "bed.H"
22 
23 
24 
25 //  Search for canu-specific names, and convert to tigID's.
26 static
27 uint32
nameToCanuID(char * name)28 nameToCanuID(char *name) {
29   uint32   id = UINT32_MAX;
30 
31   if ((name[0] == 't') &&
32       (name[1] == 'i') &&
33       (name[2] == 'g'))
34     id = strtoll(name + 3, NULL, 10);
35 
36   if ((name[0] == 'c') &&
37       (name[1] == 't') &&
38       (name[2] == 'g'))
39     id = strtoll(name + 3, NULL, 10);
40 
41   if ((name[0] == 'u') &&
42       (name[1] == 't') &&
43       (name[2] == 'g'))
44     id = strtoll(name + 3, NULL, 10);
45 
46   return(id);
47 }
48 
49 
50 
bedRecord()51 bedRecord::bedRecord() {
52   _Aname    = NULL;
53   _Aid      = UINT32_MAX;
54 
55   _bgn      = UINT32_MAX;
56   _end      = 0;
57 
58   _Bname    = NULL;
59   _Bid      = UINT32_MAX;
60 
61   _score    = 0;
62   _Bfwd     = false;
63 }
64 
65 
bedRecord(char * inLine)66 bedRecord::bedRecord(char *inLine) {
67   load(inLine);
68 }
69 
70 
~bedRecord()71 bedRecord::~bedRecord() {
72   delete [] _Aname;
73   delete [] _Bname;
74 }
75 
76 
77 void
load(char * inLine)78 bedRecord::load(char *inLine) {
79   splitToWords W(inLine);
80 
81   _Aname    = new char [strlen(W[0]) + 1];
82   _Aid      = UINT32_MAX;
83 
84   _bgn      = W.toint32(1);
85   _end      = W.toint32(2);
86 
87   _Bname    = new char [strlen(W[3]) + 1];
88   _Bid      = UINT32_MAX;
89 
90   _score    = W.touint32(4);
91   _Bfwd     = W[5][0] == '+';
92 
93   strcpy(_Aname,    W[0]);
94   strcpy(_Bname,    W[3]);
95 
96   _Aid = nameToCanuID(_Aname);    //  Search for canu-specific names, and convert to tigID's.
97   _Bid = nameToCanuID(_Bname);
98 }
99 
100 
101 void
save(FILE * outFile)102 bedRecord::save(FILE *outFile) {
103   fprintf(outFile, "%s\t%d\t%d\t%s\t%u\t%c\n",
104           _Aname, _bgn, _end, _Bname, _score, (_Bfwd == true) ? '+' : '-');
105 }
106 
107 
108 
bedFile(char * inName)109 bedFile::bedFile(char *inName) {
110   loadFile(inName);
111 }
112 
113 
~bedFile()114 bedFile::~bedFile() {
115   for (uint32 ii=0; ii<_records.size(); ii++)
116     delete _records[ii];
117 }
118 
119 
120 bool
loadFile(char * inName)121 bedFile::loadFile(char *inName) {
122   char  *L    = NULL;
123   uint32 Llen = 0;
124   uint32 Lmax = 0;
125 
126   FILE *F = AS_UTL_openInputFile(inName);
127 
128   while (AS_UTL_readLine(L, Llen, Lmax, F)) {
129     _records.push_back(new bedRecord(L));
130   }
131 
132   AS_UTL_closeFile(F, inName);
133 
134   delete [] L;
135 
136   fprintf(stderr, "bed:  Loaded " F_SIZE_T " records.\n", _records.size());
137 
138   return(true);
139 }
140 
141 
142 
143 
144 bool
saveFile(char * outName)145 bedFile::saveFile(char *outName) {
146 
147   FILE *F = AS_UTL_openOutputFile(outName);
148 
149   for (uint32 ii=0; ii<_records.size(); ii++)
150     if (_records[ii])
151       _records[ii]->save(F);
152 
153   AS_UTL_closeFile(F, outName);
154 
155   return(true);
156 }
157 
158