1 #include "Parameters.h"
2 #include "ErrorWarning.h"
3 #include "streamFuns.h"
4 #include <fstream>
5 #include <sys/stat.h>
6 #include "serviceFuns.cpp"
7 
readFilesInit()8 void Parameters::readFilesInit()
9 {//initialize read files - but do not open yet
10 
11     if (readFilesType.at(0) == "Fastx") {
12         readFilesTypeN=1;
13     } else if (readFilesType.at(0) == "SAM"){
14         readFilesTypeN=10;
15         readFiles.samAttrKeepAll = false;
16         readFiles.samAttrKeepNone = false;
17         if (readFiles.samAttrKeepIn.at(0) == "All") {
18             readFiles.samAttrKeepAll = true;
19         } else if (readFiles.samAttrKeepIn.at(0) == "None") {
20             readFiles.samAttrKeepNone = true;
21         } else {
22             for (auto &tag: readFiles.samAttrKeepIn) {
23                 if (tag.size()!=2) {
24                     exitWithError("EXITING because of FATAL PARAMETER ERROR: each SAM tags in --readFilesSAMtagsKeep should contain two letters\n\
25                                   SOLUTION: specify only two-letter tags in --readFilesSAMtagsKeep.",
26                                   std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
27                 };
28                 //array<char,2> taga = {tag[0], tag[1]};
29                 uint16_t tagn = * ( (uint16_t*) tag.c_str() );
30                 readFiles.samAttrKeep.insert(tagn);
31             };
32         };
33     } else {
34         ostringstream errOut;
35         errOut <<"EXITING because of FATAL INPUT ERROR: unknown/unimplemented value for --readFilesType: "<<readFilesType.at(0) <<"\n";
36         errOut <<"SOLUTION: specify one of the allowed values: Fastx or SAM\n";
37         exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
38     };
39 
40     readFilesPrefixFinal=(readFilesPrefix=="-" ? "" : readFilesPrefix);
41 
42     if (readFilesManifest[0]=="-") {//no manifest, file names in readFilesIn
43         readFilesNames.resize(readFilesIn.size());
44 
45         for (uint32 imate=0; imate<readFilesNames.size(); imate++) {
46             splitString(readFilesIn[imate], ',', readFilesNames[imate]);
47             if (readFilesNames[imate].back().empty()) {//extra comma at the end
48                 readFilesNames[imate].pop_back();
49             };
50 
51             if (imate>0 && readFilesNames[imate].size() != readFilesNames[imate-1].size() ) {
52                 ostringstream errOut;
53                 errOut <<"EXITING: because of fatal INPUT ERROR: number of input files for mate" << imate+1 <<"="<< readFilesNames[imate].size()  <<" is not equal to that for mate"<< imate-1 <<"="<< readFilesNames[imate-1].size() <<"\n";
54                 errOut <<"Make sure that the number of files in --readFilesIn is the same for both mates\n";
55                 exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
56             };
57 
58             for ( auto &fn : readFilesNames[imate] )
59                 fn = readFilesPrefixFinal + fn; //add prefix
60         };
61 
62         readFilesN = readFilesNames[0].size();
63 
64         //read groups
65         if (outSAMattrRGline.at(0)!="-") {
66             string linefull;
67             for (uint ii=0;ii<outSAMattrRGline.size(); ii++) {//concatenate into one line
68                 if (ii==0 || outSAMattrRGline.at(ii)==",") {//start new entry
69                     if (ii>0) ++ii;//skip comma
70                     outSAMattrRGlineSplit.push_back(outSAMattrRGline.at(ii)); //start new RG line with the first field which must be ID:xxx
71                     if (outSAMattrRGlineSplit.back().substr(0,3)!="ID:") {
72                         ostringstream errOut;
73                         errOut <<"EXITING because of FATAL INPUT ERROR: the first word of a line from --outSAMattrRGline="<<outSAMattrRGlineSplit.back()<<" does not start with ID:xxx read group identifier\n";
74                         errOut <<"SOLUTION: re-run STAR with all lines in --outSAMattrRGline starting with ID:xxx\n";
75                         exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
76                     };
77                     outSAMattrRG.push_back(outSAMattrRGlineSplit.back().substr(3)); //this adds the ID field
78                 } else {//keep adding fields to this RG line, until the next comma
79                     outSAMattrRGlineSplit.back()+="\t" + outSAMattrRGline.at(ii);
80                 };
81             };
82         };
83 
84         if (outSAMattrRG.size()>1 && outSAMattrRG.size()!=readFilesN) {
85             ostringstream errOut;
86             errOut <<"EXITING: because of fatal INPUT ERROR: number of input read files: "<< readFilesN << " does not agree with number of read group RG entries: "<< outSAMattrRG.size() <<"\n";
87             errOut <<"Make sure that the number of RG lines in --outSAMattrRGline is equal to either 1, or the number of input read files in --readFilesIn\n";
88             exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
89         } else if (outSAMattrRG.size()==1) {//use the same read group for all files
90             for (uint32 ifile=1; ifile<readFilesN; ifile++) {
91                 outSAMattrRG.push_back(outSAMattrRG.at(0));
92             };
93         };
94 
95     } else {//read file names from manifest
96         //TODO check that outSAMattrRGline and readFilesIn are not set, throw an error
97 
98         ifstream & rfM = ifstrOpen(readFilesManifest[0], ERROR_OUT, "SOLUTION: check the path and permissions for readFilesManifest = " + readFilesManifest[0], *this);
99         inOut->logMain << "Reading input file names and read groups from readFileManifest " << readFilesManifest[0] << endl;
100 
101         readFilesNames.resize(2);
102         string rfMline;
103         while (getline(rfM, rfMline)) {
104         	if (rfMline.find_first_not_of(" \t")>=rfMline.size())
105         		continue; //skip blank lines
106 
107             uint32 itab1=0, itab2=0;
108             for (uint32 imate=0; imate<2; imate++) {//SE manifest 2nd column contains "-"
109                 itab2=rfMline.find('\t',itab1);
110                 if (itab2>=rfMline.size()) {
111                     ostringstream errOut;
112                     errOut <<"EXITING because of FATAL INPUT FILE error: readFileManifest file " << readFilesManifest[0] <<  " has to contain at least 3 tab separated columns\n";
113                     errOut <<"SOLUTION: fix the formatting of the readFileManifest file: Read1 <tab> Read2 <tab> ReadGroup. For single-end reads, use - in the 2nd column.\n";
114                     exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_INPUT_FILES, *this);
115                 };
116                 readFilesNames[imate].push_back( readFilesPrefixFinal + rfMline.substr(itab1,itab2-itab1) );
117                 itab1=itab2+1;
118 
119                 inOut->logMain << readFilesNames[imate].back() <<'\t';
120             };
121 
122             outSAMattrRGlineSplit.push_back(rfMline.substr(itab2+1));
123 
124             if (outSAMattrRGlineSplit.back().substr(0,3)!="ID:")
125                 outSAMattrRGlineSplit.back().insert(0,"ID:");
126 
127             itab2=outSAMattrRGlineSplit.back().find('\t');
128             outSAMattrRG.push_back(outSAMattrRGlineSplit.back().substr(3,itab2-3));
129 
130             inOut->logMain <<  outSAMattrRGlineSplit.back() <<'\n';
131 
132         };
133         rfM.close();
134 
135         readNends = ( readFilesNames[1][0].back()=='-' ? 1 : 2);
136         readFilesNames.resize(readNends);//resize if readFilesN=1
137         readFilesN = readFilesNames[0].size();
138     };
139 
140     inOut->logMain << "Number of fastq files for each mate = " << readFilesN << endl;
141 
142     readFilesCommandString="";
143     if (readFilesCommand.at(0)=="-") {
144         if (readFilesN>1)
145             readFilesCommandString="cat   ";//concatenate multiple files
146     } else {
147         for (uint ii=0; ii<readFilesCommand.size(); ii++)
148             readFilesCommandString+=readFilesCommand.at(ii)+"   "; //concatenate into one string
149     };
150 
151     if (readFilesTypeN==1) {
152         readNends=readFilesNames.size(); //for now the number of mates is defined by the number of input files
153     } else if (readFilesTypeN==10) {//find the number of mates from the SAM file
154         if (readFilesType.size()==2 && readFilesType.at(1)=="SE") {
155             readNends=1;
156         } else if (readFilesType.size()==2 && readFilesType.at(1)=="PE") {
157             readNends=2;
158         } else {
159             ostringstream errOut;
160             errOut <<"EXITING because of FATAL INPUT ERROR: --readFilesType SAM requires specifying SE or PE reads"<<"\n";
161             errOut <<"SOLUTION: specify --readFilesType SAM SE for single-end reads or --readFilesType SAM PE for paired-end reads\n";
162             exitWithError(errOut.str(), std::cerr, inOut->logMain, EXIT_CODE_PARAMETER, *this);
163         };
164     };
165 
166     readNmates=readNends; //this may be changed later if one of the reads is barcode rea
167 };
168