1 /*
2  * Copyright (C) 2019, Siemens AG
3  * Author: Gaurav Mishra <mishra.gaurav@siemens.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * version 2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with this program; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17  */
18 
19 #include "FossologyUnicodeClean.hpp"
20 
21 using namespace std;
22 
23 /**
24  * Destructor to flush the streams and close any open files.
25  */
~FossologyUnicodeClean()26 FossologyUnicodeClean::~FossologyUnicodeClean()
27 {
28   this->flush();
29   if (this->destinationFile.is_open())
30   {
31     this->destinationFile.close();
32   }
33   if (this->sourceFile.is_open())
34   {
35     this->sourceFile.close();
36   }
37 }
38 
39 /**
40  * Constructor to open the input and output files (if passed).
41  * Also reserve the buffer in internal vector
42  * @param source      Source file path (STDIN if empty)
43  * @param destination Destination file path (STDOUT if empty)
44  */
FossologyUnicodeClean(string & source,string & destination)45 FossologyUnicodeClean::FossologyUnicodeClean(string &source,
46   string &destination) : sourceFile(NULL), destinationFile(NULL),
47       bufferSize (0), stopRead(false)
48 {
49   if ((!source.empty() && !destination.empty()) && (source == destination))
50   {
51     cerr << "Input and Output files can not be same.\n";
52     cerr << "Input: " << source << "\nOutput: " << destination;
53     cerr << " passed" << endl;
54     exit(-3);
55   }
56   if (!source.empty())
57   {
58     sourceFile.open(source, ios::in | ios::binary);
59     if (sourceFile.fail())
60     {
61       cerr << "Unable to open " << source << endl;
62       cerr << "Error: " << strerror(errno) << endl;
63       exit(-1);
64     }
65   }
66   if (!destination.empty())
67   {
68     destinationFile.open(destination, ios::out | ios::binary | ios::trunc);
69     if (destinationFile.fail())
70     {
71       cerr << "Unable to open " << destination << endl;
72       cerr << "Error: " << strerror(errno) << endl;
73       exit(-2);
74     }
75   }
76   this->buffer.reserve(MAX_BUFFER_LEN);
77 }
78 
79 /**
80  * Start the process to read from file/stream -> remove invalid chars -> print
81  * to file/stream.
82  */
startConvert()83 void FossologyUnicodeClean::startConvert()
84 {
85   string input;
86   input = this->dirtyRead();
87   while (!this->stopRead)
88   {
89     icu::UnicodeString output = fo::recodeToUnicode(input);
90     this->write(output);
91     input = this->dirtyRead();
92   }
93   this->flush();
94 }
95 
96 /**
97  * Read raw input from file or STDIN
98  * @return Raw string with MAX_LINE_READ characters.
99  */
dirtyRead()100 const string FossologyUnicodeClean::dirtyRead()
101 {
102   string input;
103   if (sourceFile.eof() || cin.eof())
104   {
105     this->stopRead = true;
106     return "";
107   }
108   if (sourceFile && sourceFile.is_open())
109   {
110     std::getline(sourceFile, input, '\n');
111   }
112   else
113   {
114     std::getline(cin, input, '\n');
115   }
116   return input;
117 }
118 
119 /**
120  * @brief Write the string to file/stream.
121  *
122  * * If the buffer is not filled, append to the buffer vector.
123  * * If the buffer is filled, call flush.
124  * @param output
125  */
write(const icu::UnicodeString & output)126 void FossologyUnicodeClean::write(const icu::UnicodeString &output)
127 {
128   this->buffer.push_back(output);
129   this->bufferSize++;
130   if (this->bufferSize == MAX_BUFFER_LEN)
131   {
132     this->flush();
133   }
134 }
135 
136 /**
137  * @brief Flush the buffers and reset the internal buffer
138  *
139  * Print the content of internal buffer to appropriate streams and flush them.
140  * Then clear the internal buffer and reset the size.
141  */
flush()142 void FossologyUnicodeClean::flush()
143 {
144   if (destinationFile && destinationFile.is_open())
145   {
146     for (size_t i = 0; i < this->buffer.size(); i++)
147     {
148       string temp;
149       buffer[i].toUTF8String(temp);
150       destinationFile << temp << "\n";
151     }
152   }
153   else
154   {
155     for (size_t i = 0; i < this->buffer.size(); i++)
156     {
157       string temp;
158       buffer[i].toUTF8String(temp);
159       cout << temp << "\n";
160     }
161   }
162   buffer.clear();
163   bufferSize = 0;
164 }
165 
166 /**
167  * Parse the CLI options for the program.
168  * @param argc        From main()
169  * @param argv        From main()
170  * @param[out] input  Input file path string (empty if not sent)
171  * @param[out] output Output file path string (empty if not sent)
172  * @return True if options parsed successfully, false otherwise
173  */
parseCliOptions(int argc,char ** argv,string & input,string & output)174 bool parseCliOptions(int argc, char **argv, string &input, string &output)
175 {
176   boost::program_options::options_description desc("fo_unicode_clean "
177     ": recognized options");
178   desc.add_options()
179   (
180     "help,h", "shows help"
181   )
182   (
183     "input,i",
184     boost::program_options::value<string>(),
185     "file to read"
186   )
187   (
188     "output,o",
189     boost::program_options::value<string>(),
190     "output file"
191   )
192   ;
193 
194   boost::program_options::variables_map vm;
195 
196   try
197   {
198     boost::program_options::store(
199       boost::program_options::command_line_parser(argc,
200         argv).options(desc).run(), vm);
201 
202     if (vm.count("help") > 0)
203     {
204       cout << desc << endl;
205       cout << "If no input passed, read from STDIN." << endl;
206       cout << "If no output passed, print to STDOUT." << endl;
207       exit(0);
208     }
209 
210     if (vm.count("input"))
211     {
212       input = vm["input"].as<string>();
213     }
214     if (vm.count("output"))
215     {
216       output = vm["output"].as<string>();
217     }
218     return true;
219   }
220   catch (boost::bad_any_cast&)
221   {
222     cout << "wrong parameter type" << endl;
223     cout << desc << endl;
224     return false;
225   }
226   catch (boost::program_options::error&)
227   {
228     cout << "wrong command line arguments" << endl;
229     cout << desc << endl;
230     return false;
231   }
232 }
233 
main(int argc,char ** argv)234 int main(int argc, char **argv)
235 {
236   string input, output;
237   if (parseCliOptions(argc, argv, input, output))
238   {
239     FossologyUnicodeClean obj(input, output);
240     obj.startConvert();
241     return 0;
242   }
243   return -4;
244 }
245