1 /*  This file is part of Jellyfish.
2 
3     Jellyfish is free software: you can redistribute it and/or modify
4     it under the terms of the GNU General Public License as published by
5     the Free Software Foundation, either version 3 of the License, or
6     (at your option) any later version.
7 
8     Jellyfish is distributed in the hope that it will be useful,
9     but WITHOUT ANY WARRANTY; without even the implied warranty of
10     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11     GNU General Public License for more details.
12 
13     You should have received a copy of the GNU General Public License
14     along with Jellyfish.  If not, see <http://www.gnu.org/licenses/>.
15 */
16 
17 #include <stdlib.h>
18 #include <vector>
19 #include <iostream>
20 #include <fstream>
21 
22 #include <jellyfish/err.hpp>
23 #include <jellyfish/misc.hpp>
24 #include <jellyfish/randomc.h>
25 #include <jellyfish/generate_sequence_cmdline.hpp>
26 
27 namespace err = jellyfish::err;
28 
29 class rDNAg_t {
30 public:
rDNAg_t(CRandomMersenne * _rng)31   rDNAg_t(CRandomMersenne *_rng) : rng(_rng), i(15), buff(0) {}
letter()32   char letter() {
33     i = (i+1) % 16;
34     if(i == 0)
35       buff = rng->BRandom();
36     char res = letters[buff & 0x3];
37     buff >>= 2;
38     return res;
39   }
qual_Illumina()40   char qual_Illumina() {
41     return rng->IRandom(66, 104);
42   }
43 private:
44   CRandomMersenne *rng;
45   int i;
46   uint32_t buff;
47   static const char letters[4];
48 };
49 const char rDNAg_t::letters[4] = { 'A', 'C', 'G', 'T' };
50 
51 // Output matrix
52   // Generate matrices
53   // uint64_t lines[64];
54   // for(unsigned int j = 0; j < args.mer_arg.size(); j++) {
55   //   if(args.mer_arg[j] <= 0 || args.mer_arg[j] > 31)
56   //     die << "Mer size (" << args.mer_arg[j] << ") must be between 1 and 31.";
57   //   int matrix_size = args.mer_arg[j] << 1;
58   //   SquareBinaryMatrix mat(matrix_size), inv(matrix_size);
59   //   while(true) {
60   //     for(int i = 0; i < matrix_size; i++)
61   //       lines[i] = (uint64_t)rng.BRandom() | ((uint64_t)rng.BRandom() << 32);
62   //     mat = SquareBinaryMatrix(lines, matrix_size);
63   //     try {
64   //       inv = mat.inverse();
65   //       break;
66   //     } catch(SquareBinaryMatrix::SingularMatrix &e) {}
67   //   }
68 
69   //   char path[4096];
70   //   int len = snprintf(path, sizeof(path), "%s_matrix_%d", args.output_arg,
71   //                      args.mer_arg[j]);
72   //   if(len < 0)
73   //     die << "Error creating the matrix file '" << path << "'" << err::no;
74   //   if((unsigned int)len >= sizeof(path))
75   //     die << "Output prefix too long '" << args.output_arg << "'";
76   //   std::ofstream fd(path);
77   //   if(!fd.good())
78   //     die << "Can't open matrix file '" << path << "'" << err::no;
79   //   if(args.verbose_flag)
80   //     std::cout << "Creating matrix file '" << path << "'\n";
81   //   mat.dump(&fd);
82   //   if(!fd.good())
83   //     die << "Error while writing matrix '" << path << "'" << err::no;
84   //   fd.close();
85   // }
86 
87 
create_path(char * path,unsigned int path_size,const char * ext,bool many,int i,const char * output_arg)88 void create_path(char *path, unsigned int path_size, const char *ext, bool many, int i, const char *output_arg) {
89   int len;
90   if(many)
91     len = snprintf(path, path_size, "%s_%d.%s", output_arg, i, ext);
92   else
93     len = snprintf(path, path_size, "%s.%s", output_arg, ext);
94   if(len < 0)
95     die(err::msg() << "Error creating the fasta file '" << path << "': " << err::no);
96   if((unsigned int)len >= path_size)
97     die(err::msg() << "Output prefix too long '" << output_arg << "'");
98 }
99 
100 generate_sequence_args args;
101 
output_fastq(size_t length,const char * path,CRandomMersenne & rng)102 void output_fastq(size_t length, const char* path, CRandomMersenne& rng) {
103   rDNAg_t rDNAg(&rng);
104   std::ofstream fd(path);
105   if(!fd.good())
106     die(err::msg() << "Can't open fasta file '" << path << "': " << jellyfish::err::no);
107   if(args.verbose_flag)
108     std::cout << "Creating fastq file '" << path << "'\n";
109 
110   size_t        total_len = 0;
111   unsigned long read_id   = 0;
112   while(total_len < length) {
113     fd << "@read_" << (read_id++) << "\n";
114     int base;
115     for(base = 0; base < 70 && total_len < length; base++, total_len++)
116       fd << rDNAg.letter();
117     fd << "\n+\n";
118     for(int j = 0; j < base; j++)
119       fd << rDNAg.qual_Illumina();
120     fd << "\n";
121   }
122   if(!fd.good())
123     die(err::msg() << "Error while writing fasta file '" << path << "': " << jellyfish::err::no);
124   fd.close();
125 }
126 
output_fasta(size_t length,const char * path,CRandomMersenne & rng)127 void output_fasta(size_t length, const char* path, CRandomMersenne& rng) {
128   rDNAg_t rDNAg(&rng);
129   std::ofstream fd(path);
130   if(!fd.good())
131     die(err::msg() << "Can't open fasta file '" << path << "': " << jellyfish::err::no);
132   if(args.verbose_flag)
133     std::cout << "Creating fasta file '" << path << "'\n";
134 
135   size_t read_length = args.read_length_given ? args.read_length_arg : length;
136   size_t total_len   = 0;
137   size_t read        = 0;
138   long   rid         = 0;
139   fd << ">read" << ++rid << "\n";
140   while(total_len < length) {
141     for(int base = 0; base < 70 && total_len < length && read < read_length;
142         base++) {
143       fd << rDNAg.letter();
144       total_len++;
145       read++;
146     }
147     fd << "\n";
148     if(read >= read_length) {
149       fd << ">read" << ++rid << "\n";
150       read = 0;
151     }
152   }
153   if(!fd.good())
154     die(err::msg() << "Error while writing fasta file '" << path << "': " << jellyfish::err::no);
155   fd.close();
156 }
157 
main(int argc,char * argv[])158 int main(int argc, char *argv[])
159 {
160   args.parse(argc, argv);
161 
162   if(args.verbose_flag)
163     std::cout << "Seed: " << args.seed_arg << "\n";
164   CRandomMersenne rng(args.seed_arg);
165 
166 
167   // Output sequence
168   char path[4096];
169   bool many = args.length_arg.size() > 1;
170 
171   for(unsigned int i = 0; i < args.length_arg.size(); ++i) {
172     if(args.fastq_flag) {
173       create_path(path, sizeof(path), "fq", many, i, args.output_arg);
174       output_fastq(args.length_arg[i], path, rng);
175     } else {
176       create_path(path, sizeof(path), "fa", many, i, args.output_arg);
177       output_fasta(args.length_arg[i], path, rng);
178     }
179   }
180 
181   return 0;
182 }
183