1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "usage_stats/usage_stats_uploader.h"
31 
32 #include <utility>
33 #include <vector>
34 
35 #ifdef OS_ANDROID
36 #include "base/android_util.h"
37 #endif  // OS_ANDROID
38 #include "base/clock.h"
39 #include "base/encryptor.h"
40 #include "base/mutex.h"
41 #include "base/port.h"
42 #include "base/singleton.h"
43 #include "base/system_util.h"
44 #include "base/util.h"
45 #include "base/version.h"
46 #include "config/config_handler.h"
47 #include "config/stats_config_util.h"
48 #include "storage/registry.h"
49 #include "usage_stats/upload_util.h"
50 #include "usage_stats/usage_stats.h"
51 #include "usage_stats/usage_stats.pb.h"
52 #include "usage_stats/usage_stats_updater.h"
53 
54 namespace mozc {
55 namespace usage_stats {
56 
57 namespace {
58 const char kRegistryPrefix[] = "usage_stats.";
59 const char kLastUploadKey[] = "last_upload";
60 const char kMozcVersionKey[] = "mozc_version";
61 const char kClientIdKey[] = "client_id";
62 const uint32 kSendInterval = 23 * 60 * 60;  // 23 hours
63 
64 #include "usage_stats/usage_stats_list.h"
65 
66 // creates randomly generated new client id and insert it to registry
CreateAndInsertClientId(string * output)67 void CreateAndInsertClientId(string *output) {
68   DCHECK(output);
69   const size_t kClientIdSize = 16;
70   char rand_str[kClientIdSize + 1];
71   Util::GetRandomAsciiSequence(rand_str, sizeof(rand_str));
72   rand_str[kClientIdSize] = '\0';
73   *output = rand_str;
74 
75   string encrypted;
76   if (!Encryptor::ProtectData(*output, &encrypted)) {
77     LOG(ERROR) << "cannot encrypt client_id";
78     return;
79   }
80   const string key = string(kRegistryPrefix) + string(kClientIdKey);
81   if (!storage::Registry::Insert(key, encrypted)) {
82     LOG(ERROR) << "cannot insert client_id to registry";
83     return;
84   }
85   return;
86 }
87 
88 // default implementation for client id
89 // this refers the registry and returns stored id if the id is found.
90 class ClientIdImpl : public ClientIdInterface {
91  public:
92   ClientIdImpl();
93   virtual ~ClientIdImpl();
94   void GetClientId(string *output);
95 };
96 
ClientIdImpl()97 ClientIdImpl::ClientIdImpl() {}
98 
~ClientIdImpl()99 ClientIdImpl::~ClientIdImpl() {}
100 
GetClientId(string * output)101 void ClientIdImpl::GetClientId(string *output) {
102   DCHECK(output);
103   string encrypted;
104   const string key = string(kRegistryPrefix) + string(kClientIdKey);
105   if (!storage::Registry::Lookup(key, &encrypted)) {
106     LOG(ERROR) << "clientid lookup failed";
107     CreateAndInsertClientId(output);
108     return;
109   }
110   if (!Encryptor::UnprotectData(encrypted, output)) {
111     LOG(ERROR) << "unprotect clientid failed";
112     CreateAndInsertClientId(output);
113     return;
114   }
115 
116   // lookup and unprotect succeeded
117   return;
118 }
119 
120 ClientIdInterface *g_client_id_handler = NULL;
121 Mutex g_mutex;  // NOLINT
122 
GetClientIdHandler()123 ClientIdInterface &GetClientIdHandler() {
124   scoped_lock l(&g_mutex);
125   if (g_client_id_handler == NULL) {
126     return *(Singleton<ClientIdImpl>::get());
127   } else {
128     return *g_client_id_handler;
129   }
130 }
131 
AddDoubleValueStatsToUploadUtil(const string & key_name_base,const Stats::DoubleValueStats & double_stats,double average_scale,double variance_scale,UploadUtil * uploader)132 void AddDoubleValueStatsToUploadUtil(
133     const string &key_name_base,
134     const Stats::DoubleValueStats &double_stats,
135     double average_scale, double variance_scale,
136     UploadUtil *uploader) {
137   if (double_stats.num() == 0) {
138     return;
139   }
140   double average = double_stats.total() / double_stats.num();
141   double variance =
142       double_stats.square_total() / double_stats.num() - average * average;
143 
144   uploader->AddIntegerValue(key_name_base + "a",
145                             static_cast<int>(average * average_scale));
146   uploader->AddIntegerValue(key_name_base + "v",
147                             static_cast<int>(variance * variance_scale));
148 }
149 
AddVirtualKeyboardStatsToUploadUtil(const Stats & stats,UploadUtil * uploader)150 void AddVirtualKeyboardStatsToUploadUtil(const Stats &stats,
151                                          UploadUtil *uploader) {
152   DCHECK(stats.type() == Stats::VIRTUAL_KEYBOARD);
153 
154   string stats_name = stats.name();
155   // Change stats name to reduce network traffic
156   if (stats_name == "VirtualKeyboardStats") {
157     stats_name = "vks";
158   } else if (stats_name == "VirtualKeyboardMissStats") {
159     stats_name = "vkms";
160   } else {
161     LOG(ERROR) << "Unexpected stats_name: " << stats_name;
162     return;
163   }
164 
165   for (size_t i = 0; i < stats.virtual_keyboard_stats_size(); ++i) {
166     const Stats::VirtualKeyboardStats &virtual_keyboard_stats =
167         stats.virtual_keyboard_stats(i);
168 
169     // Set the keyboard_id
170     // example:
171     //  vks_name_TWELVE_KEY_TOGGLE_FLICK_KANA : 0
172     //  vks_name_TWELVE_KEY_TOGGLE_KANA : 1
173     //  vks_name_TWELVE_KEY_TOGGLE_NUMBER : 2
174     //  vkms_name_TWELVE_KEY_TOGGLE_FLICK_KANA : 0
175     //  vkms_name_TWELVE_KEY_TOGGLE_NUMBER : 1
176     uploader->AddIntegerValue(stats_name + "_name_" +
177                               virtual_keyboard_stats.keyboard_name(),
178                               i);
179     // Set the average and the variance of each stat.
180     // example:
181     //  vks_1_3_sxa (VirtualKeyboardStats_StartXAverage_keyboard1_sourceid3)
182     //  ^^^ | | ||| : vks(VirtualKeyboardStats), vkms(VirtualKeyboardMissStats)
183     //      ^ | ||| : keyboad_id
184     //        ^ ||| : source_id
185     //          ^^| : sx(StartX), sx(StartY), dx(DirectionX), dy(DirectionY),
186     //            |   tl(TimeLength)
187     //            ^ : a(Average), v(Variance)
188     for (size_t j = 0; j < virtual_keyboard_stats.touch_event_stats_size();
189          ++j) {
190       // Calculate average and variance
191       //   Average = total / num
192       //   Variance = square_total / num - (total / num) ^ 2
193       // Because the current log analysis system can only deal with int values,
194       // we multiply these values by a scale factor and send them to server.
195       //   sxa, sya, dxa, dya : scale = 10000000
196       //   sxv, syv, dxv, dyv : scale = 10000000
197       //   tla, tlv : scale = 10000000
198       const Stats::TouchEventStats &touch =
199           virtual_keyboard_stats.touch_event_stats(j);
200       const string key_name_base = Util::StringPrintf(
201           "%s_%d_%d_", stats_name.c_str(), static_cast<int>(i),
202           touch.source_id());
203 
204       AddDoubleValueStatsToUploadUtil(key_name_base + "sx",
205                                       touch.start_x_stats(),
206                                       10000000.0, 10000000.0,
207                                       uploader);
208       AddDoubleValueStatsToUploadUtil(key_name_base + "sy",
209                                       touch.start_y_stats(),
210                                       10000000.0, 10000000.0,
211                                       uploader);
212       AddDoubleValueStatsToUploadUtil(key_name_base + "dx",
213                                       touch.direction_x_stats(),
214                                       10000000.0, 10000000.0,
215                                       uploader);
216       AddDoubleValueStatsToUploadUtil(key_name_base + "dy",
217                                       touch.direction_y_stats(),
218                                       10000000.0, 10000000.0,
219                                       uploader);
220       AddDoubleValueStatsToUploadUtil(key_name_base + "tl",
221                                       touch.time_length_stats(),
222                                       10000000.0, 10000000.0,
223                                       uploader);
224     }
225   }
226 }
227 
228 }  // namespace
229 
ClientIdInterface()230 ClientIdInterface::ClientIdInterface() {}
231 
~ClientIdInterface()232 ClientIdInterface::~ClientIdInterface() {}
233 
234 // 1 min
235 const uint32 UsageStatsUploader::kDefaultSchedulerDelay = 60*1000;
236 // 5 min
237 const uint32 UsageStatsUploader::kDefaultSchedulerRandomDelay = 5*60*1000;
238 // 5 min
239 const uint32 UsageStatsUploader::kDefaultScheduleInterval = 5*60*1000;
240 // 2 hours
241 const uint32 UsageStatsUploader::kDefaultScheduleMaxInterval = 2*60*60*1000;
242 
SetClientIdHandler(ClientIdInterface * client_id_handler)243 void UsageStatsUploader::SetClientIdHandler(
244     ClientIdInterface *client_id_handler) {
245   scoped_lock l(&g_mutex);
246   g_client_id_handler = client_id_handler;
247 }
248 
LoadStats(UploadUtil * uploader)249 void UsageStatsUploader::LoadStats(UploadUtil *uploader) {
250   DCHECK(uploader);
251   string stats_str;
252   Stats stats;
253   for (size_t i = 0; i < arraysize(kStatsList); ++i) {
254     const string key = string(kRegistryPrefix) + string(kStatsList[i]);
255     if (!storage::Registry::Lookup(key, &stats_str)) {
256       VLOG(4) << "stats not found: " << kStatsList[i];
257       continue;
258     }
259     if (!stats.ParseFromString(stats_str)) {
260       LOG(ERROR) << "ParseFromString failed.";
261       continue;
262     }
263     const string &name = stats.name();
264     switch (stats.type()) {
265       case Stats::COUNT:
266         DCHECK(stats.has_count()) << name;
267         uploader->AddCountValue(name, stats.count());
268         break;
269       case Stats::TIMING:
270         DCHECK(stats.has_num_timings()) << name;
271         DCHECK(stats.has_avg_time()) << name;
272         DCHECK(stats.has_min_time()) << name;
273         DCHECK(stats.has_max_time()) << name;
274         uploader->AddTimingValue(name, stats.num_timings(), stats.avg_time(),
275                                  stats.min_time(), stats.max_time());
276         break;
277       case Stats::INTEGER:
278         DCHECK(stats.has_int_value()) << name;
279         uploader->AddIntegerValue(name, stats.int_value());
280         break;
281       case Stats::BOOLEAN:
282         DCHECK(stats.has_boolean_value()) << name;
283         uploader->AddBooleanValue(name, stats.boolean_value());
284         break;
285       case Stats::VIRTUAL_KEYBOARD:
286         AddVirtualKeyboardStatsToUploadUtil(stats, uploader);
287         break;
288       default:
289         VLOG(3) << "stats " << name << " has no type";
290         break;
291     }
292   }
293 }
294 
GetClientId(string * output)295 void UsageStatsUploader::GetClientId(string *output) {
296   GetClientIdHandler().GetClientId(output);
297 }
298 
Send(void * data)299 bool UsageStatsUploader::Send(void *data) {
300   const string upload_key = string(kRegistryPrefix) + kLastUploadKey;
301   const uint32 current_sec = static_cast<uint32>(Clock::GetTime());
302   uint32 last_upload_sec = 0;
303   const string mozc_version_key = string(kRegistryPrefix) + kMozcVersionKey;
304   const string &current_mozc_version = Version::GetMozcVersion();
305   string last_mozc_version;
306   if (!storage::Registry::Lookup(upload_key, &last_upload_sec) ||
307       last_upload_sec > current_sec ||
308       !storage::Registry::Lookup(mozc_version_key, &last_mozc_version) ||
309       last_mozc_version != current_mozc_version) {
310     // quit here just saving current time and clear stats
311     UsageStats::ClearStats();
312     bool result = true;
313     result &= storage::Registry::Insert(upload_key, current_sec);
314     result &= storage::Registry::Insert(mozc_version_key, current_mozc_version);
315 
316     LOG_IF(ERROR, !result) << "cannot save usage stats metadata to registry";
317     return result;
318   }
319 
320   // if usage stats is disabled, we simply clear stats here.
321   if (!mozc::config::StatsConfigUtil::IsEnabled()) {
322     UsageStats::ClearStats();
323     VLOG(1) << "UsageStats is disabled.";
324     return false;
325   }
326 
327   uint32 elapsed_sec = 0;
328   if (current_sec >= last_upload_sec) {
329     elapsed_sec = current_sec - last_upload_sec;
330   } else {
331     LOG(WARNING) << "Timing counter seems to be reversed."
332                  << " current_sec: " << current_sec
333                  << " last_upload_sec: " << last_upload_sec;
334   }
335 
336   if (elapsed_sec < kSendInterval) {
337     VLOG(1) << "Skip uploading the data as not enough time has passed yet."
338             << " current_sec: " << current_sec
339             << " last_upload_sec: " << last_upload_sec
340             << " elapsed_sec: " << elapsed_sec
341             << " kSendInterval: " << kSendInterval;
342     return false;
343   }
344 
345   std::vector<std::pair<string, string> > params;
346   params.push_back(std::make_pair("hl", "ja"));
347   params.push_back(std::make_pair("v", Version::GetMozcVersion()));
348   string client_id;
349   GetClientId(&client_id);
350   DCHECK(!client_id.empty());
351   params.push_back(std::make_pair("client_id", client_id));
352   params.push_back(std::make_pair("os_ver", SystemUtil::GetOSVersionString()));
353 #ifdef OS_ANDROID
354   params.push_back(
355       std::make_pair("model",
356                      AndroidUtil::GetSystemProperty(
357                          AndroidUtil::kSystemPropertyModel, "Unknown")));
358 #endif  // OS_ANDROID
359 
360   {
361     config::Config config;
362     config::ConfigHandler::GetConfig(&config);
363     UsageStatsUpdater::UpdateStats(config);
364   }
365 
366   UploadUtil uploader;
367   uploader.SetHeader("Daily", elapsed_sec, params);
368 #if defined(OS_NACL) || defined(OS_ANDROID)
369   // We use HTTPS to send usage stats in the following platforms:
370   // - NaCl: We use HTTPS to follow Chrome OS convention.
371   // https://code.google.com/p/chromium/issues/detail?id=255327
372   // - Android: We use HTTPS to follow Android recommendation.
373   // https://developer.android.com/guide/topics/manifest/application-element.html#usesCleartextTraffic
374   uploader.SetUseHttps(true);
375 #endif  // OS_NACL
376   LoadStats(&uploader);
377 
378   // Just check for confirming that we can insert the value to upload_key.
379   if (!storage::Registry::Insert(upload_key, last_upload_sec)) {
380     LOG(ERROR) << "cannot save to registry";
381     return false;
382   }
383   if (!uploader.Upload()) {
384     LOG(ERROR) << "usagestats upload failed";
385     UsageStats::IncrementCount("UsageStatsUploadFailed");
386     return false;
387   }
388   UsageStats::ClearStats();
389 
390   // Actual insersion to upload_key
391   if (!storage::Registry::Insert(upload_key, current_sec)) {
392     // This case is the worst case, but we will not send the data
393     // at the next trial, because next checking for insertion to upload_key
394     // should be failed.
395     LOG(ERROR) << "cannot save current_time to registry";
396     return false;
397   }
398   if (!UsageStats::Sync()) {
399     LOG(ERROR) << "Failed to sync cleared usage stats to disk storage.";
400     return false;
401   }
402 
403   VLOG(2) << "send success";
404   return true;
405 }
406 
407 }  // namespace usage_stats
408 }  // namespace mozc
409