1 /* This file is part of Jellyfish.
2
3 Jellyfish is free software: you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation, either version 3 of the License, or
6 (at your option) any later version.
7
8 Jellyfish is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with Jellyfish. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17 #include <stdlib.h>
18 #include <vector>
19 #include <iostream>
20 #include <fstream>
21
22 #include <jellyfish/err.hpp>
23 #include <jellyfish/misc.hpp>
24 #include <jellyfish/randomc.h>
25 #include <jellyfish/generate_sequence_cmdline.hpp>
26
27 namespace err = jellyfish::err;
28
29 class rDNAg_t {
30 public:
rDNAg_t(CRandomMersenne * _rng)31 rDNAg_t(CRandomMersenne *_rng) : rng(_rng), i(15), buff(0) {}
letter()32 char letter() {
33 i = (i+1) % 16;
34 if(i == 0)
35 buff = rng->BRandom();
36 char res = letters[buff & 0x3];
37 buff >>= 2;
38 return res;
39 }
qual_Illumina()40 char qual_Illumina() {
41 return rng->IRandom(66, 104);
42 }
43 private:
44 CRandomMersenne *rng;
45 int i;
46 uint32_t buff;
47 static const char letters[4];
48 };
49 const char rDNAg_t::letters[4] = { 'A', 'C', 'G', 'T' };
50
51 // Output matrix
52 // Generate matrices
53 // uint64_t lines[64];
54 // for(unsigned int j = 0; j < args.mer_arg.size(); j++) {
55 // if(args.mer_arg[j] <= 0 || args.mer_arg[j] > 31)
56 // die << "Mer size (" << args.mer_arg[j] << ") must be between 1 and 31.";
57 // int matrix_size = args.mer_arg[j] << 1;
58 // SquareBinaryMatrix mat(matrix_size), inv(matrix_size);
59 // while(true) {
60 // for(int i = 0; i < matrix_size; i++)
61 // lines[i] = (uint64_t)rng.BRandom() | ((uint64_t)rng.BRandom() << 32);
62 // mat = SquareBinaryMatrix(lines, matrix_size);
63 // try {
64 // inv = mat.inverse();
65 // break;
66 // } catch(SquareBinaryMatrix::SingularMatrix &e) {}
67 // }
68
69 // char path[4096];
70 // int len = snprintf(path, sizeof(path), "%s_matrix_%d", args.output_arg,
71 // args.mer_arg[j]);
72 // if(len < 0)
73 // die << "Error creating the matrix file '" << path << "'" << err::no;
74 // if((unsigned int)len >= sizeof(path))
75 // die << "Output prefix too long '" << args.output_arg << "'";
76 // std::ofstream fd(path);
77 // if(!fd.good())
78 // die << "Can't open matrix file '" << path << "'" << err::no;
79 // if(args.verbose_flag)
80 // std::cout << "Creating matrix file '" << path << "'\n";
81 // mat.dump(&fd);
82 // if(!fd.good())
83 // die << "Error while writing matrix '" << path << "'" << err::no;
84 // fd.close();
85 // }
86
87
create_path(char * path,unsigned int path_size,const char * ext,bool many,int i,const char * output_arg)88 void create_path(char *path, unsigned int path_size, const char *ext, bool many, int i, const char *output_arg) {
89 int len;
90 if(many)
91 len = snprintf(path, path_size, "%s_%d.%s", output_arg, i, ext);
92 else
93 len = snprintf(path, path_size, "%s.%s", output_arg, ext);
94 if(len < 0)
95 die(err::msg() << "Error creating the fasta file '" << path << "': " << err::no);
96 if((unsigned int)len >= path_size)
97 die(err::msg() << "Output prefix too long '" << output_arg << "'");
98 }
99
100 generate_sequence_args args;
101
output_fastq(size_t length,const char * path,CRandomMersenne & rng)102 void output_fastq(size_t length, const char* path, CRandomMersenne& rng) {
103 rDNAg_t rDNAg(&rng);
104 std::ofstream fd(path);
105 if(!fd.good())
106 die(err::msg() << "Can't open fasta file '" << path << "': " << jellyfish::err::no);
107 if(args.verbose_flag)
108 std::cout << "Creating fastq file '" << path << "'\n";
109
110 size_t total_len = 0;
111 unsigned long read_id = 0;
112 while(total_len < length) {
113 fd << "@read_" << (read_id++) << "\n";
114 int base;
115 for(base = 0; base < 70 && total_len < length; base++, total_len++)
116 fd << rDNAg.letter();
117 fd << "\n+\n";
118 for(int j = 0; j < base; j++)
119 fd << rDNAg.qual_Illumina();
120 fd << "\n";
121 }
122 if(!fd.good())
123 die(err::msg() << "Error while writing fasta file '" << path << "': " << jellyfish::err::no);
124 fd.close();
125 }
126
output_fasta(size_t length,const char * path,CRandomMersenne & rng)127 void output_fasta(size_t length, const char* path, CRandomMersenne& rng) {
128 rDNAg_t rDNAg(&rng);
129 std::ofstream fd(path);
130 if(!fd.good())
131 die(err::msg() << "Can't open fasta file '" << path << "': " << jellyfish::err::no);
132 if(args.verbose_flag)
133 std::cout << "Creating fasta file '" << path << "'\n";
134
135 size_t read_length = args.read_length_given ? args.read_length_arg : length;
136 size_t total_len = 0;
137 size_t read = 0;
138 long rid = 0;
139 fd << ">read" << ++rid << "\n";
140 while(total_len < length) {
141 for(int base = 0; base < 70 && total_len < length && read < read_length;
142 base++) {
143 fd << rDNAg.letter();
144 total_len++;
145 read++;
146 }
147 fd << "\n";
148 if(read >= read_length) {
149 fd << ">read" << ++rid << "\n";
150 read = 0;
151 }
152 }
153 if(!fd.good())
154 die(err::msg() << "Error while writing fasta file '" << path << "': " << jellyfish::err::no);
155 fd.close();
156 }
157
main(int argc,char * argv[])158 int main(int argc, char *argv[])
159 {
160 args.parse(argc, argv);
161
162 if(args.verbose_flag)
163 std::cout << "Seed: " << args.seed_arg << "\n";
164 CRandomMersenne rng(args.seed_arg);
165
166
167 // Output sequence
168 char path[4096];
169 bool many = args.length_arg.size() > 1;
170
171 for(unsigned int i = 0; i < args.length_arg.size(); ++i) {
172 if(args.fastq_flag) {
173 create_path(path, sizeof(path), "fq", many, i, args.output_arg);
174 output_fastq(args.length_arg[i], path, rng);
175 } else {
176 create_path(path, sizeof(path), "fa", many, i, args.output_arg);
177 output_fasta(args.length_arg[i], path, rng);
178 }
179 }
180
181 return 0;
182 }
183