1 // Copyright (C) 2015-2021 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #include <config.h>
8 #include <util/versioned_csv_file.h>
9 #include <boost/scoped_ptr.hpp>
10 #include <gtest/gtest.h>
11 #include <fstream>
12 #include <sstream>
13 #include <string>
14 
15 #include <boost/algorithm/string/classification.hpp>
16 #include <boost/algorithm/string/constants.hpp>
17 #include <boost/algorithm/string/split.hpp>
18 
19 namespace {
20 
21 using namespace isc::util;
22 
23 /// @brief Test fixture class for testing operations on VersionedCSVFile.
24 ///
25 /// It implements basic operations on files, such as reading writing
26 /// file removal and checking presence of the file. This is used by
27 /// unit tests to verify correctness of the file created by the
28 /// CSVFile class.
29 class VersionedCSVFileTest : public ::testing::Test {
30 public:
31 
32     /// @brief Constructor.
33     ///
34     /// Sets the path to the CSV file used throughout the tests.
35     /// The name of the file is test.csv and it is located in the
36     /// current build folder.
37     ///
38     /// It also deletes any dangling files after previous tests.
39     VersionedCSVFileTest();
40 
41     /// @brief Destructor.
42     ///
43     /// Deletes the test CSV file if any.
44     virtual ~VersionedCSVFileTest();
45 
46     /// @brief Prepends the absolute path to the file specified
47     /// as an argument.
48     ///
49     /// @param filename Name of the file.
50     /// @return Absolute path to the test file.
51     static std::string absolutePath(const std::string& filename);
52 
53     /// @brief Check if test file exists on disk.
54     bool exists() const;
55 
56     /// @brief Reads whole CSV file.
57     ///
58     /// @return Contents of the file.
59     std::string readFile() const;
60 
61     /// @brief Removes existing file (if any).
62     int removeFile() const;
63 
64     /// @brief Creates file with contents.
65     ///
66     /// @param contents Contents of the file.
67     void writeFile(const std::string& contents) const;
68 
69     /// @brief Absolute path to the file used in the tests.
70     std::string testfile_;
71 
72 };
73 
VersionedCSVFileTest()74 VersionedCSVFileTest::VersionedCSVFileTest()
75     : testfile_(absolutePath("test.csv")) {
76     static_cast<void>(removeFile());
77 }
78 
~VersionedCSVFileTest()79 VersionedCSVFileTest::~VersionedCSVFileTest() {
80     static_cast<void>(removeFile());
81 }
82 
83 std::string
absolutePath(const std::string & filename)84 VersionedCSVFileTest::absolutePath(const std::string& filename) {
85     std::ostringstream s;
86     s << TEST_DATA_BUILDDIR << "/" << filename;
87     return (s.str());
88 }
89 
90 bool
exists() const91 VersionedCSVFileTest::exists() const {
92     std::ifstream fs(testfile_.c_str());
93     bool ok = fs.good();
94     fs.close();
95     return (ok);
96 }
97 
98 std::string
readFile() const99 VersionedCSVFileTest::readFile() const {
100     std::ifstream fs(testfile_.c_str());
101     if (!fs.is_open()) {
102         return ("");
103     }
104     std::string contents((std::istreambuf_iterator<char>(fs)),
105                          std::istreambuf_iterator<char>());
106     fs.close();
107     return (contents);
108 }
109 
110 int
removeFile() const111 VersionedCSVFileTest::removeFile() const {
112     return (remove(testfile_.c_str()));
113 }
114 
115 void
writeFile(const std::string & contents) const116 VersionedCSVFileTest::writeFile(const std::string& contents) const {
117     std::ofstream fs(testfile_.c_str(), std::ofstream::out);
118     if (fs.is_open()) {
119         fs << contents;
120         fs.close();
121     }
122 }
123 
124 // This test checks that the function which is used to add columns of the
125 // CSV file works as expected.
TEST_F(VersionedCSVFileTest,addColumn)126 TEST_F(VersionedCSVFileTest, addColumn) {
127     boost::scoped_ptr<VersionedCSVFile> csv(new VersionedCSVFile(testfile_));
128 
129     // Verify that we're not allowed to open it without the schema
130     ASSERT_THROW(csv->open(), VersionedCSVFileError);
131 
132     // Add two columns.
133     ASSERT_NO_THROW(csv->addColumn("animal", "1.0", ""));
134     ASSERT_NO_THROW(csv->addColumn("color", "2.0", "blue"));
135 
136     // Make sure we can't add duplicates.
137     EXPECT_THROW(csv->addColumn("animal", "1.0", ""), CSVFileError);
138     EXPECT_THROW(csv->addColumn("color", "2.0", "blue"), CSVFileError);
139 
140     // But we should still be able to add unique columns.
141     EXPECT_NO_THROW(csv->addColumn("age", "3.0", "21"));
142 
143     // Assert that the file is opened, because the rest of the test relies
144     // on this.
145     ASSERT_NO_THROW(csv->recreate());
146     ASSERT_TRUE(exists());
147 
148     // We should have 3 defined columns
149     // Input Header should match defined columns on new files
150     // Valid columns should match defined columns on new files
151     // Minimum valid columns wasn't set. (Remember it's optional)
152     EXPECT_EQ(3, csv->getColumnCount());
153     EXPECT_EQ(3, csv->getInputHeaderCount());
154     EXPECT_EQ(3, csv->getValidColumnCount());
155     EXPECT_EQ(0, csv->getMinimumValidColumns());
156 
157     // Schema versions for new files should always match
158     EXPECT_EQ("3.0", csv->getInputSchemaVersion());
159     EXPECT_EQ("3.0", csv->getSchemaVersion());
160 
161     // Input Schema State should be current for new files
162     EXPECT_EQ(VersionedCSVFile::CURRENT, csv->getInputSchemaState());
163     EXPECT_FALSE(csv->needsConversion());
164 
165     // Make sure we can't add columns (even unique) when the file is open.
166     ASSERT_THROW(csv->addColumn("zoo", "3.0", ""), CSVFileError);
167 
168     // Close the file.
169     ASSERT_NO_THROW(csv->close());
170     // And check that now it is possible to add the column.
171     EXPECT_NO_THROW(csv->addColumn("zoo", "3.0", ""));
172 }
173 
174 // Verifies that a current schema version file loads correctly.
TEST_F(VersionedCSVFileTest,currentSchemaTest)175 TEST_F(VersionedCSVFileTest, currentSchemaTest) {
176 
177     // Create our versioned file, with three columns
178     boost::scoped_ptr<VersionedCSVFile> csv(new VersionedCSVFile(testfile_));
179     ASSERT_NO_THROW(csv->addColumn("animal", "2.0", ""));
180     ASSERT_NO_THROW(csv->addColumn("color", "2.0", "grey"));
181     ASSERT_NO_THROW(csv->addColumn("age", "2.0", "0"));
182 
183     // Write a file compliant with the current schema version.
184     writeFile("animal,color,age\n"
185               "cat,black,2\n"
186               "lion,yellow,17\n"
187               "dog,brown,5\n");
188 
189     // Header should pass validation and allow the open to succeed.
190     ASSERT_NO_THROW(csv->open());
191 
192     // For schema current file We should have:
193     // 3 defined columns
194     // 3 columns total found in the header
195     // 3 valid columns found in the header
196     // Minimum valid columns wasn't set. (Remember it's optional)
197     EXPECT_EQ(3, csv->getColumnCount());
198     EXPECT_EQ(3, csv->getInputHeaderCount());
199     EXPECT_EQ(3, csv->getValidColumnCount());
200     EXPECT_EQ(0, csv->getMinimumValidColumns());
201 
202     // Input schema and current schema should both be  2.0
203     EXPECT_EQ("2.0", csv->getInputSchemaVersion());
204     EXPECT_EQ("2.0", csv->getSchemaVersion());
205 
206     // Input Schema State should be CURRENT
207     EXPECT_EQ(VersionedCSVFile::CURRENT, csv->getInputSchemaState());
208     EXPECT_FALSE(csv->needsConversion());
209 
210     // First row is correct.
211     CSVRow row;
212     ASSERT_TRUE(csv->next(row));
213     EXPECT_EQ("cat", row.readAt(0));
214     EXPECT_EQ("black", row.readAt(1));
215     EXPECT_EQ("2", row.readAt(2));
216 
217     // Second row is correct.
218     ASSERT_TRUE(csv->next(row));
219     EXPECT_EQ("lion", row.readAt(0));
220     EXPECT_EQ("yellow", row.readAt(1));
221     EXPECT_EQ("17", row.readAt(2));
222 
223     // Third row is correct.
224     ASSERT_TRUE(csv->next(row));
225     EXPECT_EQ("dog", row.readAt(0));
226     EXPECT_EQ("brown", row.readAt(1));
227     EXPECT_EQ("5", row.readAt(2));
228 }
229 
230 
231 // Verifies the basic ability to upgrade valid files.
232 // It starts with a version 1.0 file and updates
233 // it through two schema evolutions.
TEST_F(VersionedCSVFileTest,upgradeOlderVersions)234 TEST_F(VersionedCSVFileTest, upgradeOlderVersions) {
235 
236     // Create version 1.0 schema  CSV file
237     writeFile("animal\n"
238               "cat\n"
239               "lion\n"
240               "dog\n");
241 
242     // Create our versioned file, with two columns, one for each
243     // schema version
244     boost::scoped_ptr<VersionedCSVFile> csv(new VersionedCSVFile(testfile_));
245     ASSERT_NO_THROW(csv->addColumn("animal", "1.0", ""));
246     ASSERT_NO_THROW(csv->addColumn("color", "2.0", "blue"));
247 
248     // Header should pass validation and allow the open to succeed.
249     ASSERT_NO_THROW(csv->open());
250 
251     // We should have:
252     // 2 defined columns
253     // 1 column found in the header
254     // 1 valid column in the header
255     // Minimum valid columns wasn't set. (Remember it's optional)
256     EXPECT_EQ(2, csv->getColumnCount());
257     EXPECT_EQ(1, csv->getInputHeaderCount());
258     EXPECT_EQ(1, csv->getValidColumnCount());
259     EXPECT_EQ(0, csv->getMinimumValidColumns());
260 
261     // Input schema should be 1.0, while our current schema should be 2.0
262     EXPECT_EQ("1.0", csv->getInputSchemaVersion());
263     EXPECT_EQ("2.0", csv->getSchemaVersion());
264 
265     // Input Schema State should be NEEDS_UPGRADE
266     EXPECT_EQ(VersionedCSVFile::NEEDS_UPGRADE, csv->getInputSchemaState());
267     EXPECT_TRUE(csv->needsConversion());
268 
269     // First row is correct.
270     CSVRow row;
271     ASSERT_TRUE(csv->next(row));
272     EXPECT_EQ("cat", row.readAt(0));
273     EXPECT_EQ("blue", row.readAt(1));
274 
275     // Second row is correct.
276     ASSERT_TRUE(csv->next(row));
277     EXPECT_EQ("lion", row.readAt(0));
278     EXPECT_EQ("blue", row.readAt(1));
279 
280     // Third row is correct.
281     ASSERT_TRUE(csv->next(row));
282     EXPECT_EQ("dog", row.readAt(0));
283     EXPECT_EQ("blue", row.readAt(1));
284 
285     // Now, let's try to append something to this file.
286     CSVRow row_write(2);
287     row_write.writeAt(0, "bird");
288     row_write.writeAt(1, "yellow");
289     ASSERT_NO_THROW(csv->append(row_write));
290 
291     // Close the file
292     ASSERT_NO_THROW(csv->flush());
293     ASSERT_NO_THROW(csv->close());
294 
295 
296     // Check the file contents are correct.
297     EXPECT_EQ("animal\n"
298               "cat\n"
299               "lion\n"
300               "dog\n"
301               "bird,yellow\n",
302               readFile());
303 
304     // Create a third schema by adding a column
305     ASSERT_NO_THROW(csv->addColumn("age", "3.0", "21"));
306     ASSERT_EQ(3, csv->getColumnCount());
307 
308     // Header should pass validation and allow the open to succeed
309     ASSERT_NO_THROW(csv->open());
310 
311     // We should have:
312     // 3 defined columns
313     // 1 column found in the header
314     // 1 valid column in the header
315     // Minimum valid columns wasn't set. (Remember it's optional)
316     EXPECT_EQ(3, csv->getColumnCount());
317     EXPECT_EQ(1, csv->getInputHeaderCount());
318     EXPECT_EQ(1, csv->getValidColumnCount());
319     EXPECT_EQ(0, csv->getMinimumValidColumns());
320 
321     // Make sure schema versions are accurate
322     EXPECT_EQ("1.0", csv->getInputSchemaVersion());
323     EXPECT_EQ("3.0", csv->getSchemaVersion());
324 
325     // Input Schema State should be NEEDS_UPGRADE
326     EXPECT_EQ(VersionedCSVFile::NEEDS_UPGRADE, csv->getInputSchemaState());
327     EXPECT_TRUE(csv->needsConversion());
328 
329     // First row is correct.
330     ASSERT_TRUE(csv->next(row));
331     EXPECT_EQ("cat", row.readAt(0));
332     EXPECT_EQ("blue", row.readAt(1));
333     EXPECT_EQ("21", row.readAt(2));
334 
335     // Second row is correct.
336     ASSERT_TRUE(csv->next(row));
337     EXPECT_EQ("lion", row.readAt(0));
338     EXPECT_EQ("blue", row.readAt(1));
339     EXPECT_EQ("21", row.readAt(2));
340 
341     // Third row is correct.
342     ASSERT_TRUE(csv->next(row));
343     EXPECT_EQ("dog", row.readAt(0));
344     EXPECT_EQ("blue", row.readAt(1));
345     EXPECT_EQ("21", row.readAt(2));
346 
347     // Fourth row is correct.
348     ASSERT_TRUE(csv->next(row));
349     EXPECT_EQ("bird", row.readAt(0));
350     EXPECT_EQ("yellow", row.readAt(1));
351     EXPECT_EQ("21", row.readAt(2));
352 }
353 
TEST_F(VersionedCSVFileTest,minimumValidColumn)354 TEST_F(VersionedCSVFileTest, minimumValidColumn) {
355     // Create version 1.0 schema  CSV file
356     writeFile("animal\n"
357               "cat\n"
358               "lion\n"
359               "dog\n");
360 
361     // Create our versioned file, with three columns, one for each
362     // schema version
363     boost::scoped_ptr<VersionedCSVFile> csv(new VersionedCSVFile(testfile_));
364     ASSERT_NO_THROW(csv->addColumn("animal", "1.0", ""));
365     ASSERT_NO_THROW(csv->addColumn("color", "2.0", "blue"));
366     ASSERT_NO_THROW(csv->addColumn("age", "3.0", "21"));
367 
368     // Verify we can't set minimum columns with a non-existent column
369     EXPECT_THROW(csv->setMinimumValidColumns("bogus"), VersionedCSVFileError);
370 
371     // Set the minimum number of columns to "color"
372     csv->setMinimumValidColumns("color");
373     EXPECT_EQ(2, csv->getMinimumValidColumns());
374 
375     // Header validation should fail, too few columns
376     ASSERT_THROW(csv->open(), CSVFileError);
377 
378     // Set the minimum number of columns to 1.  File should parse now.
379     csv->setMinimumValidColumns("animal");
380     EXPECT_EQ(1, csv->getMinimumValidColumns());
381     ASSERT_NO_THROW(csv->open());
382 
383     // First row is correct.
384     CSVRow row;
385     ASSERT_TRUE(csv->next(row));
386     EXPECT_EQ("cat", row.readAt(0));
387     EXPECT_EQ("blue", row.readAt(1));
388     EXPECT_EQ("21", row.readAt(2));
389 
390     ASSERT_TRUE(csv->next(row));
391     EXPECT_EQ("lion", row.readAt(0));
392     EXPECT_EQ("blue", row.readAt(1));
393     EXPECT_EQ("21", row.readAt(2));
394 
395     ASSERT_TRUE(csv->next(row));
396     EXPECT_EQ("dog", row.readAt(0));
397     EXPECT_EQ("blue", row.readAt(1));
398     EXPECT_EQ("21", row.readAt(2));
399 }
400 
TEST_F(VersionedCSVFileTest,invalidHeaderColumn)401 TEST_F(VersionedCSVFileTest, invalidHeaderColumn) {
402 
403     // Create our version 2.0 schema file
404     boost::scoped_ptr<VersionedCSVFile> csv(new VersionedCSVFile(testfile_));
405     ASSERT_NO_THROW(csv->addColumn("animal", "1.0", ""));
406     ASSERT_NO_THROW(csv->addColumn("color", "2.0", "blue"));
407 
408     // Create a file with the correct number of columns but a wrong column name
409     writeFile("animal,colour\n"
410               "cat,red\n"
411               "lion,green\n");
412 
413     // Header validation should fail, we have an invalid column
414     ASSERT_THROW(csv->open(), CSVFileError);
415 }
416 
TEST_F(VersionedCSVFileTest,downGrading)417 TEST_F(VersionedCSVFileTest, downGrading) {
418     // Create our version 2.0 schema file
419     boost::scoped_ptr<VersionedCSVFile> csv(new VersionedCSVFile(testfile_));
420     ASSERT_NO_THROW(csv->addColumn("animal", "1.0", ""));
421     ASSERT_NO_THROW(csv->addColumn("color", "2.0", "blue"));
422 
423     // Create schema 2.0 file PLUS an extra column
424     writeFile("animal,color,age\n"
425               "cat,red,5\n"
426               "lion,green,8\n");
427 
428     // Header should validate and file should open.
429     ASSERT_NO_THROW(csv->open());
430 
431     // We should have:
432     // 2 defined columns
433     // 3 columns found in the header
434     // 2 valid columns in the header
435     // Minimum valid columns wasn't set. (Remember it's optional)
436     EXPECT_EQ(2, csv->getColumnCount());
437     EXPECT_EQ(3, csv->getInputHeaderCount());
438     EXPECT_EQ(2, csv->getValidColumnCount());
439     EXPECT_EQ(0, csv->getMinimumValidColumns());
440 
441     // Input schema and current schema should both be 2.0
442     EXPECT_EQ("2.0", csv->getInputSchemaVersion());
443     EXPECT_EQ("2.0", csv->getSchemaVersion());
444 
445     // Input Schema State should be NEEDS_DOWNGRADE
446     EXPECT_EQ(VersionedCSVFile::NEEDS_DOWNGRADE, csv->getInputSchemaState());
447     EXPECT_TRUE(csv->needsConversion());
448 
449     // First row is correct.
450     CSVRow row;
451     EXPECT_TRUE(csv->next(row));
452     EXPECT_EQ("cat", row.readAt(0));
453     EXPECT_EQ("red", row.readAt(1));
454 
455     // No data beyond the second column
456     EXPECT_THROW(row.readAt(2), CSVFileError);
457 
458     // Second row is correct.
459     ASSERT_TRUE(csv->next(row));
460     EXPECT_EQ("lion", row.readAt(0));
461     EXPECT_EQ("green", row.readAt(1));
462 
463     // No data beyond the second column
464     EXPECT_THROW(row.readAt(2), CSVFileError);
465 }
466 
467 
TEST_F(VersionedCSVFileTest,rowChecking)468 TEST_F(VersionedCSVFileTest, rowChecking) {
469     // Create version 2.0 schema CSV file with a
470     // - valid header
471     // - row 0 has too many values
472     // - row 1 is valid
473     // - row 3 is too few values
474     writeFile("animal,color\n"
475               "cat,red,bogus_row_value\n"
476               "lion,green\n"
477               "too_few\n");
478 
479     // Create our versioned file, with two columns, one for each
480     // schema version
481     boost::scoped_ptr<VersionedCSVFile> csv(new VersionedCSVFile(testfile_));
482     csv->addColumn("animal", "1.0", "");
483     csv->addColumn("color", "2.0", "blue");
484 
485     // Header validation should pass, so we can open
486     ASSERT_NO_THROW(csv->open());
487 
488     CSVRow row;
489     // First row has too many
490     EXPECT_FALSE(csv->next(row));
491 
492     // Second row is valid
493     ASSERT_TRUE(csv->next(row));
494     EXPECT_EQ("lion", row.readAt(0));
495     EXPECT_EQ("green", row.readAt(1));
496 
497     // Third row has too few
498     EXPECT_FALSE(csv->next(row));
499 }
500 
501 } // end of anonymous namespace
502