1 // ==========================================================================
2 // Mason - A Read Simulator
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34
35 #include "external_split_merge.h"
36
37 // ---------------------------------------------------------------------------
38 // Function IdSplitter::open()
39 // ---------------------------------------------------------------------------
40
open()41 void IdSplitter::open()
42 {
43 close();
44 for (unsigned i = 0; i < numContigs; ++i)
45 {
46 #if defined(STDLIB_VS)
47 char fileNameBuffer[1000];
48 char filePathBuffer[1000];
49 // Gets the temp path env string (no guarantee it's a valid path).
50 DWORD dwRetVal = 0;
51 dwRetVal = GetTempPath(1000, // length of the buffer
52 filePathBuffer); // buffer for path
53 if (dwRetVal > 1000 || (dwRetVal == 0))
54 {
55 std::cerr << "GetTempPath failed" << std::endl;
56 exit(1);
57 }
58
59 UINT uRetVal = 0;
60 uRetVal = GetTempFileName(filePathBuffer, // directory for tmp files
61 TEXT("MASON_"), // temp file name prefix
62 0, // create unique name
63 fileNameBuffer); // buffer for name
64
65 if (uRetVal == 0)
66 {
67 std::cerr << "GetTempFileName failed" << std::endl;
68 exit(1);
69 }
70
71 // The file exists now and is already closed.
72 DeleteFile(fileNameBuffer);
73 files.push_back(new std::fstream(&fileNameBuffer[0], std::ios::binary | std::ios::in | std::ios::out));
74 // TODO: Deleting the file here will not actually remove it on windows.
75 fileNames.push_back(fileNameBuffer);
76 #else // POSIX (Linux/Mac)
77 // Create temporary file using POSIX API, open with <cstdio>, delete, close POSIX.
78 char const * tmpdir = 0;
79 if ((tmpdir = getenv("TMPDIR")) == NULL)
80 tmpdir = "/tmp";
81 std::string pathTpl = tmpdir;
82 pathTpl += "/MASON_XXXXXX";
83
84 mode_t cur_umask = umask(S_IRWXO | S_IRWXG); // to silence Coverity warning
85 int fd = mkstemp(&pathTpl[0]);
86 files.push_back(new std::fstream(pathTpl.c_str(), std::ios::binary | std::ios::in | std::ios::out));
87 umask(cur_umask);
88 remove(pathTpl.c_str());
89 ::close(fd);
90 #endif
91
92 if (!files.back())
93 {
94 std::cerr << "ERROR: Could not open temporary file!\n";
95 exit(1);
96 }
97 }
98 }
99
100 // ---------------------------------------------------------------------------
101 // Function IdSplitter::reset()
102 // ---------------------------------------------------------------------------
103
reset()104 void IdSplitter::reset()
105 {
106 for (unsigned i = 0; i < files.size(); ++i)
107 if (files[i] != 0)
108 {
109 SEQAN_ASSERT(files[i]->good());
110 files[i]->flush();
111 files[i]->seekg(0);
112 SEQAN_ASSERT(files[i]->good());
113 }
114 }
115
116 // ---------------------------------------------------------------------------
117 // Function IdSplitter::close()
118 // ---------------------------------------------------------------------------
119
close()120 void IdSplitter::close()
121 {
122 for (unsigned i = 0; i < files.size(); ++i)
123 if (files[i])
124 {
125 delete files[i];
126 #ifdef STDLIB_VS
127 DeleteFile(fileNames[i].c_str());
128 #endif // #ifdef STDLIB_VS
129 files[i] = 0;
130 }
131 files.clear();
132 fileNames.clear();
133 }
134
135 // ---------------------------------------------------------------------------
136 // Function SamJoiner::init()
137 // ---------------------------------------------------------------------------
138
init(seqan::BamFileOut * outPtr)139 void SamJoiner::init(seqan::BamFileOut * outPtr)
140 {
141 resize(records, splitter->files.size());
142 active.resize(splitter->files.size());
143
144 for (unsigned i = 0; i < splitter->files.size(); ++i)
145 {
146 if (i == 0u && outPtr)
147 bamFileIns.push_back(new seqan::BamFileIn(*outPtr, *splitter->files[i]));
148 else
149 bamFileIns.push_back(new seqan::BamFileIn(*splitter->files[i]));
150
151 // We use a separate header structure and name stores and caches. Since the headers of all files are equal, we
152 // will write out the first one only.
153 seqan::BamHeader tmpHeader;
154 readHeader(tmpHeader, *bamFileIns[i]);
155 if (i == 0u)
156 header = tmpHeader;
157
158 active[i] = _loadNext(records[i], i);
159 numActive += (active[i] != false);
160 }
161 }
162
163 // ---------------------------------------------------------------------------
164 // Function SamJoiner::_loadNext()
165 // ---------------------------------------------------------------------------
166
_loadNext(seqan::BamAlignmentRecord & record,unsigned idx)167 bool SamJoiner::_loadNext(seqan::BamAlignmentRecord & record, unsigned idx)
168 {
169 if (seqan::atEnd(*bamFileIns[idx]))
170 return false;
171 readRecord(record, *bamFileIns[idx]);
172 return true;
173 }
174
175 // ---------------------------------------------------------------------------
176 // Function SamJoiner::get()
177 // ---------------------------------------------------------------------------
178
get(seqan::BamAlignmentRecord & record)179 int SamJoiner::get(seqan::BamAlignmentRecord & record)
180 {
181 unsigned idx = std::numeric_limits<unsigned>::max();
182 for (unsigned i = 0; i < length(records); ++i)
183 {
184 if (!active[i])
185 continue;
186 if (idx == std::numeric_limits<unsigned>::max() || ltBamAlignmentRecord(records[i], records[idx]))
187 idx = i;
188 }
189 if (idx == std::numeric_limits<unsigned>::max())
190 return 1;
191
192 // We use double-buffering and the input parameters as buffers.
193 using std::swap;
194 active[idx] = _loadNext(record, idx);
195 swap(record, records[idx]);
196 numActive -= !active[idx];
197
198 return 0;
199 }
200
201 // ---------------------------------------------------------------------------
202 // Function ContigPicker::pick()
203 // ---------------------------------------------------------------------------
204
pick()205 std::pair<int, int> ContigPicker::pick()
206 {
207 // Pick reference id.
208 int rID = 0;
209 if (lengthSums.size() > 1u)
210 {
211 std::uniform_int_distribution<int64_t> dist(0, lengthSums.back() - 1);
212 int64_t x = dist(rng);
213 for (unsigned i = 0; i < lengthSums.size(); ++i)
214 {
215 if (x >= lengthSums[i])
216 rID = i + 1;
217 if (x < lengthSums[i])
218 break;
219 }
220 }
221
222 // Pick haplotype id.
223 std::uniform_int_distribution<int> dist2(0, numHaplotypes - 1);
224 int hID = dist2(rng);
225
226 return std::make_pair(rID, hID);
227 }
228