1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "data_manager/dataset_reader.h"
31 
32 #include <sstream>
33 #include <string>
34 
35 #include "base/port.h"
36 #include "base/util.h"
37 #include "data_manager/dataset_writer.h"
38 #include "testing/base/public/gunit.h"
39 
40 namespace mozc {
41 namespace {
42 
GenerateRandomBytes(size_t len)43 string GenerateRandomBytes(size_t len) {
44   string s;
45   s.resize(len);
46   for (size_t i = 0; i < len; ++i) {
47     s[i] = static_cast<char>(Util::Random(256));
48   }
49   return s;
50 }
51 
GetTestMagicNumber()52 string GetTestMagicNumber() {
53   return string("ma\0gic", 6);
54 }
55 
TEST(DataSetReaderTest,ValidData)56 TEST(DataSetReaderTest, ValidData) {
57   const StringPiece kGoogle("GOOGLE"), kMozc("m\0zc\xEF", 5);
58   string image;
59   {
60     DataSetWriter w(GetTestMagicNumber());
61     w.Add("google", 16, kGoogle);
62     w.Add("mozc", 64, kMozc);
63     std::stringstream out;
64     w.Finish(&out);
65     image = out.str();
66   }
67 
68   DataSetReader r;
69   ASSERT_TRUE(DataSetReader::VerifyChecksum(image));
70   ASSERT_TRUE(r.Init(image, GetTestMagicNumber()));
71 
72   StringPiece data;
73   EXPECT_TRUE(r.Get("google", &data));
74   EXPECT_EQ(kGoogle, data);
75   EXPECT_TRUE(r.Get("mozc", &data));
76   EXPECT_EQ(kMozc, data);
77 
78   EXPECT_FALSE(r.Get("", &data));
79   EXPECT_FALSE(r.Get("foo", &data));
80 }
81 
TEST(DataSetReaderTest,InvalidMagicString)82 TEST(DataSetReaderTest, InvalidMagicString) {
83   const string &magic = GetTestMagicNumber();
84   DataSetReader r;
85   EXPECT_FALSE(r.Init("", magic));
86   EXPECT_FALSE(r.Init("abc", magic));
87   EXPECT_FALSE(r.Init("this is a text file", magic));
88 }
89 
TEST(DataSetReaderTest,BrokenMetadata)90 TEST(DataSetReaderTest, BrokenMetadata) {
91   const string &magic = GetTestMagicNumber();
92   DataSetReader r;
93 
94   // Only magic number, no metadata.
95   EXPECT_FALSE(DataSetReader::VerifyChecksum(magic));
96   EXPECT_FALSE(r.Init(magic, magic));
97 
98   // Metadata size is too small.
99   string data = magic;
100   data.append("content and metadata");
101   data.append(Util::SerializeUint64(0));
102   EXPECT_FALSE(DataSetReader::VerifyChecksum(data));
103   EXPECT_FALSE(r.Init(data, magic));
104 
105   // Metadata size is too small.
106   data = magic;
107   data.append("content and metadata");
108   data.append(Util::SerializeUint64(4));
109   EXPECT_FALSE(DataSetReader::VerifyChecksum(data));
110   EXPECT_FALSE(r.Init(data, magic));
111 
112   // Metadata size is too large.
113   data = magic;
114   data.append("content and metadata");
115   data.append(Util::SerializeUint64(kuint64max));
116   EXPECT_FALSE(DataSetReader::VerifyChecksum(data));
117   EXPECT_FALSE(r.Init(data, magic));
118 
119   // Metadata chunk is not a serialied protobuf.
120   data = magic;
121   data.append("content and metadata");
122   data.append(Util::SerializeUint64(strlen("content and metadata")));
123   EXPECT_FALSE(DataSetReader::VerifyChecksum(data));
124   EXPECT_FALSE(r.Init(data, magic));
125 }
126 
TEST(DataSetReaderTest,BrokenMetadataFields)127 TEST(DataSetReaderTest, BrokenMetadataFields) {
128   const string &magic = GetTestMagicNumber();
129   const StringPiece kGoogle("GOOGLE"), kMozc("m\0zc\xEF", 5);
130   string content;
131   {
132     DataSetWriter w(magic);
133     w.Add("google", 16, kGoogle);
134     w.Add("mozc", 64, kMozc);
135     std::stringstream out;
136     w.Finish(&out);
137 
138     // Remove the metadata chunk at the bottom, which will be appended in each
139     // test below.
140     content = out.str();
141     const size_t metadata_size = w.metadata().SerializeAsString().size();
142     content.erase(content.size() - metadata_size - 8);
143   }
144   {
145     // Create an image with broken metadata.
146     DataSetMetadata md;
147     auto e = md.add_entries();
148     e->set_name("google");
149     e->set_offset(content.size() + 3);  // Invalid offset
150     e->set_size(kGoogle.size());
151     const string &md_str = md.SerializeAsString();
152     string image = content;
153     image.append(md_str);
154     image.append(Util::SerializeUint64(md_str.size()));
155 
156     DataSetReader r;
157     EXPECT_FALSE(DataSetReader::VerifyChecksum(image));
158     EXPECT_FALSE(r.Init(image, magic));
159   }
160   {
161     // Create an image with broken size.
162     DataSetMetadata md;
163     auto e = md.add_entries();
164     e->set_name("google");
165     e->set_offset(content.size());
166     e->set_size(kuint64max);        // Too big size
167     const string &md_str = md.SerializeAsString();
168     string image = content;
169     image.append(md_str);
170     image.append(Util::SerializeUint64(md_str.size()));
171 
172     DataSetReader r;
173     EXPECT_FALSE(DataSetReader::VerifyChecksum(image));
174     EXPECT_FALSE(r.Init(image, magic));
175   }
176 }
177 
TEST(DataSetReaderTest,OneBitError)178 TEST(DataSetReaderTest, OneBitError) {
179   const char* kTestMagicNumber = "Dummy magic number\r\n";
180 
181   // Create data at random.
182   string image;
183   {
184     const int kAlignments[] = {8, 16, 32, 64};
185     DataSetWriter w(kTestMagicNumber);
186     for (int i = 0; i < 10; ++i) {
187       w.Add(Util::StringPrintf("key%d", i),
188             kAlignments[Util::Random(4)],
189             GenerateRandomBytes(1 + Util::Random(1024)));
190     }
191     std::stringstream out;
192     w.Finish(&out);
193     image = out.str();
194   }
195 
196   DataSetReader r;
197   ASSERT_TRUE(DataSetReader::VerifyChecksum(image));
198   ASSERT_TRUE(r.Init(image, kTestMagicNumber));
199 
200   // Flip each bit and test if VerifyChecksum fails.
201   for (size_t i = 0; i < image.size(); ++i) {
202     const char orig = image[i];
203     for (size_t j = 0; j < 8; ++j) {
204       image[i] = orig ^ (1 << j);  // Flip (j + 1)-th bit
205 
206       // Since checksum is computed from the bytes up to metadata size, errors
207       // in the last 8 bytes (where file size is stored) cannot be tested using
208       // the checksum.  However, in such case, Init() should fail due to file
209       // size mismatch.  Thus, either VerifyChecksum() or Init() fails.
210       EXPECT_FALSE(DataSetReader::VerifyChecksum(image) &&
211                    r.Init(image, kTestMagicNumber));
212     }
213     image[i] = orig;  // Recover the original data.
214   }
215 }
216 
217 }  // namespace
218 }  // namespace mozc
219