1 /*
2 * Copyright (C) 2019, Siemens AG
3 * Author: Gaurav Mishra <mishra.gaurav@siemens.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19 #include "FossologyUnicodeClean.hpp"
20
21 using namespace std;
22
23 /**
24 * Destructor to flush the streams and close any open files.
25 */
~FossologyUnicodeClean()26 FossologyUnicodeClean::~FossologyUnicodeClean()
27 {
28 this->flush();
29 if (this->destinationFile.is_open())
30 {
31 this->destinationFile.close();
32 }
33 if (this->sourceFile.is_open())
34 {
35 this->sourceFile.close();
36 }
37 }
38
39 /**
40 * Constructor to open the input and output files (if passed).
41 * Also reserve the buffer in internal vector
42 * @param source Source file path (STDIN if empty)
43 * @param destination Destination file path (STDOUT if empty)
44 */
FossologyUnicodeClean(string & source,string & destination)45 FossologyUnicodeClean::FossologyUnicodeClean(string &source,
46 string &destination) : sourceFile(NULL), destinationFile(NULL),
47 bufferSize (0), stopRead(false)
48 {
49 if ((!source.empty() && !destination.empty()) && (source == destination))
50 {
51 cerr << "Input and Output files can not be same.\n";
52 cerr << "Input: " << source << "\nOutput: " << destination;
53 cerr << " passed" << endl;
54 exit(-3);
55 }
56 if (!source.empty())
57 {
58 sourceFile.open(source, ios::in | ios::binary);
59 if (sourceFile.fail())
60 {
61 cerr << "Unable to open " << source << endl;
62 cerr << "Error: " << strerror(errno) << endl;
63 exit(-1);
64 }
65 }
66 if (!destination.empty())
67 {
68 destinationFile.open(destination, ios::out | ios::binary | ios::trunc);
69 if (destinationFile.fail())
70 {
71 cerr << "Unable to open " << destination << endl;
72 cerr << "Error: " << strerror(errno) << endl;
73 exit(-2);
74 }
75 }
76 this->buffer.reserve(MAX_BUFFER_LEN);
77 }
78
79 /**
80 * Start the process to read from file/stream -> remove invalid chars -> print
81 * to file/stream.
82 */
startConvert()83 void FossologyUnicodeClean::startConvert()
84 {
85 string input;
86 input = this->dirtyRead();
87 while (!this->stopRead)
88 {
89 icu::UnicodeString output = fo::recodeToUnicode(input);
90 this->write(output);
91 input = this->dirtyRead();
92 }
93 this->flush();
94 }
95
96 /**
97 * Read raw input from file or STDIN
98 * @return Raw string with MAX_LINE_READ characters.
99 */
dirtyRead()100 const string FossologyUnicodeClean::dirtyRead()
101 {
102 string input;
103 if (sourceFile.eof() || cin.eof())
104 {
105 this->stopRead = true;
106 return "";
107 }
108 if (sourceFile && sourceFile.is_open())
109 {
110 std::getline(sourceFile, input, '\n');
111 }
112 else
113 {
114 std::getline(cin, input, '\n');
115 }
116 return input;
117 }
118
119 /**
120 * @brief Write the string to file/stream.
121 *
122 * * If the buffer is not filled, append to the buffer vector.
123 * * If the buffer is filled, call flush.
124 * @param output
125 */
write(const icu::UnicodeString & output)126 void FossologyUnicodeClean::write(const icu::UnicodeString &output)
127 {
128 this->buffer.push_back(output);
129 this->bufferSize++;
130 if (this->bufferSize == MAX_BUFFER_LEN)
131 {
132 this->flush();
133 }
134 }
135
136 /**
137 * @brief Flush the buffers and reset the internal buffer
138 *
139 * Print the content of internal buffer to appropriate streams and flush them.
140 * Then clear the internal buffer and reset the size.
141 */
flush()142 void FossologyUnicodeClean::flush()
143 {
144 if (destinationFile && destinationFile.is_open())
145 {
146 for (size_t i = 0; i < this->buffer.size(); i++)
147 {
148 string temp;
149 buffer[i].toUTF8String(temp);
150 destinationFile << temp << "\n";
151 }
152 }
153 else
154 {
155 for (size_t i = 0; i < this->buffer.size(); i++)
156 {
157 string temp;
158 buffer[i].toUTF8String(temp);
159 cout << temp << "\n";
160 }
161 }
162 buffer.clear();
163 bufferSize = 0;
164 }
165
166 /**
167 * Parse the CLI options for the program.
168 * @param argc From main()
169 * @param argv From main()
170 * @param[out] input Input file path string (empty if not sent)
171 * @param[out] output Output file path string (empty if not sent)
172 * @return True if options parsed successfully, false otherwise
173 */
parseCliOptions(int argc,char ** argv,string & input,string & output)174 bool parseCliOptions(int argc, char **argv, string &input, string &output)
175 {
176 boost::program_options::options_description desc("fo_unicode_clean "
177 ": recognized options");
178 desc.add_options()
179 (
180 "help,h", "shows help"
181 )
182 (
183 "input,i",
184 boost::program_options::value<string>(),
185 "file to read"
186 )
187 (
188 "output,o",
189 boost::program_options::value<string>(),
190 "output file"
191 )
192 ;
193
194 boost::program_options::variables_map vm;
195
196 try
197 {
198 boost::program_options::store(
199 boost::program_options::command_line_parser(argc,
200 argv).options(desc).run(), vm);
201
202 if (vm.count("help") > 0)
203 {
204 cout << desc << endl;
205 cout << "If no input passed, read from STDIN." << endl;
206 cout << "If no output passed, print to STDOUT." << endl;
207 exit(0);
208 }
209
210 if (vm.count("input"))
211 {
212 input = vm["input"].as<string>();
213 }
214 if (vm.count("output"))
215 {
216 output = vm["output"].as<string>();
217 }
218 return true;
219 }
220 catch (boost::bad_any_cast&)
221 {
222 cout << "wrong parameter type" << endl;
223 cout << desc << endl;
224 return false;
225 }
226 catch (boost::program_options::error&)
227 {
228 cout << "wrong command line arguments" << endl;
229 cout << desc << endl;
230 return false;
231 }
232 }
233
main(int argc,char ** argv)234 int main(int argc, char **argv)
235 {
236 string input, output;
237 if (parseCliOptions(argc, argv, input, output))
238 {
239 FossologyUnicodeClean obj(input, output);
240 obj.startConvert();
241 return 0;
242 }
243 return -4;
244 }
245