1 #include "Parameters.h"
2 #include "ErrorWarning.h"
3 #include "streamFuns.h"
4 #include <fstream>
5 #include <sys/stat.h>
6 #include "serviceFuns.cpp"
7
readFilesInit()8 void Parameters::readFilesInit()
9 {//initialize read files - but do not open yet
10
11 if (readFilesType.at(0) == "Fastx") {
12 readFilesTypeN=1;
13 } else if (readFilesType.at(0) == "SAM"){
14 readFilesTypeN=10;
15 readFiles.samAttrKeepAll = false;
16 readFiles.samAttrKeepNone = false;
17 if (readFiles.samAttrKeepIn.at(0) == "All") {
18 readFiles.samAttrKeepAll = true;
19 } else if (readFiles.samAttrKeepIn.at(0) == "None") {
20 readFiles.samAttrKeepNone = true;
21 } else {
22 for (auto &tag: readFiles.samAttrKeepIn) {
23 if (tag.size()!=2) {
24 exitWithError("EXITING because of FATAL PARAMETER ERROR: each SAM tags in --readFilesSAMtagsKeep should contain two letters\n\
25 SOLUTION: specify only two-letter tags in --readFilesSAMtagsKeep.",
26 std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
27 };
28 //array<char,2> taga = {tag[0], tag[1]};
29 uint16_t tagn = * ( (uint16_t*) tag.c_str() );
30 readFiles.samAttrKeep.insert(tagn);
31 };
32 };
33 } else {
34 ostringstream errOut;
35 errOut <<"EXITING because of FATAL INPUT ERROR: unknown/unimplemented value for --readFilesType: "<<readFilesType.at(0) <<"\n";
36 errOut <<"SOLUTION: specify one of the allowed values: Fastx or SAM\n";
37 exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
38 };
39
40 readFilesPrefixFinal=(readFilesPrefix=="-" ? "" : readFilesPrefix);
41
42 if (readFilesManifest[0]=="-") {//no manifest, file names in readFilesIn
43 readFilesNames.resize(readFilesIn.size());
44
45 for (uint32 imate=0; imate<readFilesNames.size(); imate++) {
46 splitString(readFilesIn[imate], ',', readFilesNames[imate]);
47 if (readFilesNames[imate].back().empty()) {//extra comma at the end
48 readFilesNames[imate].pop_back();
49 };
50
51 if (imate>0 && readFilesNames[imate].size() != readFilesNames[imate-1].size() ) {
52 ostringstream errOut;
53 errOut <<"EXITING: because of fatal INPUT ERROR: number of input files for mate" << imate+1 <<"="<< readFilesNames[imate].size() <<" is not equal to that for mate"<< imate-1 <<"="<< readFilesNames[imate-1].size() <<"\n";
54 errOut <<"Make sure that the number of files in --readFilesIn is the same for both mates\n";
55 exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
56 };
57
58 for ( auto &fn : readFilesNames[imate] )
59 fn = readFilesPrefixFinal + fn; //add prefix
60 };
61
62 readFilesN = readFilesNames[0].size();
63
64 //read groups
65 if (outSAMattrRGline.at(0)!="-") {
66 string linefull;
67 for (uint ii=0;ii<outSAMattrRGline.size(); ii++) {//concatenate into one line
68 if (ii==0 || outSAMattrRGline.at(ii)==",") {//start new entry
69 if (ii>0) ++ii;//skip comma
70 outSAMattrRGlineSplit.push_back(outSAMattrRGline.at(ii)); //start new RG line with the first field which must be ID:xxx
71 if (outSAMattrRGlineSplit.back().substr(0,3)!="ID:") {
72 ostringstream errOut;
73 errOut <<"EXITING because of FATAL INPUT ERROR: the first word of a line from --outSAMattrRGline="<<outSAMattrRGlineSplit.back()<<" does not start with ID:xxx read group identifier\n";
74 errOut <<"SOLUTION: re-run STAR with all lines in --outSAMattrRGline starting with ID:xxx\n";
75 exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
76 };
77 outSAMattrRG.push_back(outSAMattrRGlineSplit.back().substr(3)); //this adds the ID field
78 } else {//keep adding fields to this RG line, until the next comma
79 outSAMattrRGlineSplit.back()+="\t" + outSAMattrRGline.at(ii);
80 };
81 };
82 };
83
84 if (outSAMattrRG.size()>1 && outSAMattrRG.size()!=readFilesN) {
85 ostringstream errOut;
86 errOut <<"EXITING: because of fatal INPUT ERROR: number of input read files: "<< readFilesN << " does not agree with number of read group RG entries: "<< outSAMattrRG.size() <<"\n";
87 errOut <<"Make sure that the number of RG lines in --outSAMattrRGline is equal to either 1, or the number of input read files in --readFilesIn\n";
88 exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
89 } else if (outSAMattrRG.size()==1) {//use the same read group for all files
90 for (uint32 ifile=1; ifile<readFilesN; ifile++) {
91 outSAMattrRG.push_back(outSAMattrRG.at(0));
92 };
93 };
94
95 } else {//read file names from manifest
96 //TODO check that outSAMattrRGline and readFilesIn are not set, throw an error
97
98 ifstream & rfM = ifstrOpen(readFilesManifest[0], ERROR_OUT, "SOLUTION: check the path and permissions for readFilesManifest = " + readFilesManifest[0], *this);
99 inOut->logMain << "Reading input file names and read groups from readFileManifest " << readFilesManifest[0] << endl;
100
101 readFilesNames.resize(2);
102 string rfMline;
103 while (getline(rfM, rfMline)) {
104 if (rfMline.find_first_not_of(" \t")>=rfMline.size())
105 continue; //skip blank lines
106
107 uint32 itab1=0, itab2=0;
108 for (uint32 imate=0; imate<2; imate++) {//SE manifest 2nd column contains "-"
109 itab2=rfMline.find('\t',itab1);
110 if (itab2>=rfMline.size()) {
111 ostringstream errOut;
112 errOut <<"EXITING because of FATAL INPUT FILE error: readFileManifest file " << readFilesManifest[0] << " has to contain at least 3 tab separated columns\n";
113 errOut <<"SOLUTION: fix the formatting of the readFileManifest file: Read1 <tab> Read2 <tab> ReadGroup. For single-end reads, use - in the 2nd column.\n";
114 exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_INPUT_FILES, *this);
115 };
116 readFilesNames[imate].push_back( readFilesPrefixFinal + rfMline.substr(itab1,itab2-itab1) );
117 itab1=itab2+1;
118
119 inOut->logMain << readFilesNames[imate].back() <<'\t';
120 };
121
122 outSAMattrRGlineSplit.push_back(rfMline.substr(itab2+1));
123
124 if (outSAMattrRGlineSplit.back().substr(0,3)!="ID:")
125 outSAMattrRGlineSplit.back().insert(0,"ID:");
126
127 itab2=outSAMattrRGlineSplit.back().find('\t');
128 outSAMattrRG.push_back(outSAMattrRGlineSplit.back().substr(3,itab2-3));
129
130 inOut->logMain << outSAMattrRGlineSplit.back() <<'\n';
131
132 };
133 rfM.close();
134
135 readNends = ( readFilesNames[1][0].back()=='-' ? 1 : 2);
136 readFilesNames.resize(readNends);//resize if readFilesN=1
137 readFilesN = readFilesNames[0].size();
138 };
139
140 inOut->logMain << "Number of fastq files for each mate = " << readFilesN << endl;
141
142 readFilesCommandString="";
143 if (readFilesCommand.at(0)=="-") {
144 if (readFilesN>1)
145 readFilesCommandString="cat ";//concatenate multiple files
146 } else {
147 for (uint ii=0; ii<readFilesCommand.size(); ii++)
148 readFilesCommandString+=readFilesCommand.at(ii)+" "; //concatenate into one string
149 };
150
151 if (readFilesTypeN==1) {
152 readNends=readFilesNames.size(); //for now the number of mates is defined by the number of input files
153 } else if (readFilesTypeN==10) {//find the number of mates from the SAM file
154 if (readFilesType.size()==2 && readFilesType.at(1)=="SE") {
155 readNends=1;
156 } else if (readFilesType.size()==2 && readFilesType.at(1)=="PE") {
157 readNends=2;
158 } else {
159 ostringstream errOut;
160 errOut <<"EXITING because of FATAL INPUT ERROR: --readFilesType SAM requires specifying SE or PE reads"<<"\n";
161 errOut <<"SOLUTION: specify --readFilesType SAM SE for single-end reads or --readFilesType SAM PE for paired-end reads\n";
162 exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
163 };
164 };
165
166 readNmates=readNends; //this may be changed later if one of the reads is barcode rea
167 };
168