1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/metrics/demographics/user_demographics.h"
6 
7 #include <utility>
8 
9 #include "base/check.h"
10 #include "base/optional.h"
11 #include "base/rand_util.h"
12 #include "base/values.h"
13 #include "components/pref_registry/pref_registry_syncable.h"
14 #include "components/prefs/pref_service.h"
15 
16 namespace metrics {
17 
18 // Root dictionary pref to store the user's birth year and gender that are
19 // provided by the sync server. This is a read-only syncable priority pref, sent
20 // from the sync server to the client.
21 const char kSyncDemographicsPrefName[] = "sync.demographics";
22 
23 // Stores a "secret" offset that is used to randomize the birth year for metrics
24 // reporting. This value should not be logged to UMA directly; instead, it
25 // should be summed with the kSyncDemographicsBirthYear. This value is generated
26 // locally on the client the first time a user begins to merge birth year data
27 // into their UMA reports. The value is synced to the user's other devices so
28 // that the user consistently uses the same offset across login/logout events
29 // and after clearing their other browser data.
30 const char kSyncDemographicsBirthYearOffsetPrefName[] =
31     "sync.demographics_birth_year_offset";
32 
33 // This pref value is subordinate to the kSyncDemographics dictionary pref and
34 // is synced to the client. It stores the self-reported birth year of the
35 // syncing user. as provided by the sync server. This value should not be logged
36 // to UMA directly; instead, it should be summed with the
37 // kSyncDemographicsBirthYearNoiseOffset.
38 const char kSyncDemographicsBirthYearPath[] = "birth_year";
39 
40 // This pref value is subordinate to the kSyncDemographics dictionary pref and
41 // is synced to the client. It stores the self-reported gender of the syncing
42 // user, as provided by the sync server. The gender is encoded using the Gender
43 // enum defined in UserDemographicsProto
44 // (see third_party/metrics_proto/user_demographics.proto).
45 const char kSyncDemographicsGenderPath[] = "gender";
46 
47 namespace {
48 
49 // Gets an offset to add noise to the birth year. If not present in prefs, the
50 // offset will be randomly generated within the offset range and cached in
51 // syncable prefs.
GetBirthYearOffset(PrefService * pref_service)52 int GetBirthYearOffset(PrefService* pref_service) {
53   int offset =
54       pref_service->GetInteger(kSyncDemographicsBirthYearOffsetPrefName);
55   if (offset == kUserDemographicsBirthYearNoiseOffsetDefaultValue) {
56     // Generate a random offset when not cached in prefs.
57     offset = base::RandInt(-kUserDemographicsBirthYearNoiseOffsetRange,
58                            kUserDemographicsBirthYearNoiseOffsetRange);
59     pref_service->SetInteger(kSyncDemographicsBirthYearOffsetPrefName, offset);
60   }
61   return offset;
62 }
63 
64 // Determines whether the synced user has provided a birth year to Google which
65 // is eligible, once aggregated and anonymized, to measure usage of Chrome
66 // features by age groups. See doc of DemographicMetricsProvider in
67 // demographic_metrics_provider.h for more details.
HasEligibleBirthYear(base::Time now,int user_birth_year,int offset)68 bool HasEligibleBirthYear(base::Time now, int user_birth_year, int offset) {
69   // Compute user age.
70   base::Time::Exploded exploded_now_time;
71   now.LocalExplode(&exploded_now_time);
72   int user_age = exploded_now_time.year - (user_birth_year + offset);
73 
74   // Verify if the synced user's age has a population size in the age
75   // distribution of the society that is big enough to not raise the entropy of
76   // the demographics too much. At a certain point, as the age increase, the
77   // size of the population starts declining sharply as you can see in this
78   // approximate representation of the age distribution:
79   // |       ________         max age
80   // |______/        \_________ |
81   // |                          |\
82   // |                          | \
83   // +--------------------------|---------
84   //  0 10 20 30 40 50 60 70 80 90 100+
85   if (user_age > kUserDemographicsMaxAgeInYears)
86     return false;
87 
88   // Verify if the synced user is old enough. Use > rather than >= because we
89   // want to be sure that the user is at least |kUserDemographicsMinAgeInYears|
90   // without disclosing their birth date, which requires to add an extra year
91   // margin to the minimal age to be safe. For example, if we are in 2019-07-10
92   // (now) and the user was born in 1999-08-10, the user is not yet 20 years old
93   // (minimal age) but we cannot know that because we only have access to the
94   // year of the dates (2019 and 1999 respectively). If we make sure that the
95   // minimal age (computed at year granularity) is at least 21, we are 100% sure
96   // that the user will be at least 20 years old when providing the user’s birth
97   // year and gender.
98   return user_age > kUserDemographicsMinAgeInYears;
99 }
100 
101 // Gets the synced user's birth year from synced prefs, see doc of
102 // DemographicMetricsProvider in demographic_metrics_provider.h for more
103 // details.
GetUserBirthYear(const base::DictionaryValue * demographics)104 base::Optional<int> GetUserBirthYear(
105     const base::DictionaryValue* demographics) {
106   const base::Value* value =
107       demographics->FindPath(kSyncDemographicsBirthYearPath);
108   int birth_year = (value != nullptr && value->is_int())
109                        ? value->GetInt()
110                        : kUserDemographicsBirthYearDefaultValue;
111 
112   // Verify that there is a birth year.
113   if (birth_year == kUserDemographicsBirthYearDefaultValue)
114     return base::nullopt;
115 
116   return birth_year;
117 }
118 
119 // Gets the synced user's gender from synced prefs, see doc of
120 // DemographicMetricsProvider in demographic_metrics_provider.h for more
121 // details.
GetUserGender(const base::DictionaryValue * demographics)122 base::Optional<UserDemographicsProto_Gender> GetUserGender(
123     const base::DictionaryValue* demographics) {
124   const base::Value* value =
125       demographics->FindPath(kSyncDemographicsGenderPath);
126   int gender_int = (value != nullptr && value->is_int())
127                        ? value->GetInt()
128                        : kUserDemographicsGenderDefaultValue;
129 
130   // Verify that the gender is not default.
131   if (gender_int == kUserDemographicsGenderDefaultValue)
132     return base::nullopt;
133 
134   // Verify that the gender number is a valid UserDemographicsProto_Gender
135   // encoding.
136   if (!UserDemographicsProto_Gender_IsValid(gender_int))
137     return base::nullopt;
138 
139   auto gender = UserDemographicsProto_Gender(gender_int);
140 
141   // Verify that the gender is in a large enough population set to preserve
142   // anonymity.
143   if (gender != UserDemographicsProto::GENDER_FEMALE &&
144       gender != UserDemographicsProto::GENDER_MALE) {
145     return base::nullopt;
146   }
147 
148   return gender;
149 }
150 
151 }  // namespace
152 
153 // static
ForValue(UserDemographics value)154 UserDemographicsResult UserDemographicsResult::ForValue(
155     UserDemographics value) {
156   return UserDemographicsResult(std::move(value),
157                                 UserDemographicsStatus::kSuccess);
158 }
159 
160 // static
ForStatus(UserDemographicsStatus status)161 UserDemographicsResult UserDemographicsResult::ForStatus(
162     UserDemographicsStatus status) {
163   DCHECK(status != UserDemographicsStatus::kSuccess);
164   return UserDemographicsResult(UserDemographics(), status);
165 }
166 
IsSuccess() const167 bool UserDemographicsResult::IsSuccess() const {
168   return status_ == UserDemographicsStatus::kSuccess;
169 }
170 
status() const171 UserDemographicsStatus UserDemographicsResult::status() const {
172   return status_;
173 }
174 
value() const175 const UserDemographics& UserDemographicsResult::value() const {
176   return value_;
177 }
178 
UserDemographicsResult(UserDemographics value,UserDemographicsStatus status)179 UserDemographicsResult::UserDemographicsResult(UserDemographics value,
180                                                UserDemographicsStatus status)
181     : value_(std::move(value)), status_(status) {}
182 
RegisterDemographicsProfilePrefs(user_prefs::PrefRegistrySyncable * registry)183 void RegisterDemographicsProfilePrefs(
184     user_prefs::PrefRegistrySyncable* registry) {
185   registry->RegisterDictionaryPref(
186       kSyncDemographicsPrefName,
187       user_prefs::PrefRegistrySyncable::SYNCABLE_PRIORITY_PREF);
188   registry->RegisterIntegerPref(
189       kSyncDemographicsBirthYearOffsetPrefName,
190       kUserDemographicsBirthYearNoiseOffsetDefaultValue,
191       user_prefs::PrefRegistrySyncable::SYNCABLE_PREF);
192 }
193 
ClearDemographicsPrefs(PrefService * pref_service)194 void ClearDemographicsPrefs(PrefService* pref_service) {
195   // Clear user's birth year and gender.
196   // Note that we retain kSyncDemographicsBirthYearOffset. If the user resumes
197   // syncing, causing these prefs to be recreated, we don't want them to start
198   // reporting a different randomized birth year as this could narrow down or
199   // even reveal their true birth year.
200   pref_service->ClearPref(kSyncDemographicsPrefName);
201 }
202 
GetUserNoisedBirthYearAndGenderFromPrefs(base::Time now,PrefService * pref_service)203 UserDemographicsResult GetUserNoisedBirthYearAndGenderFromPrefs(
204     base::Time now,
205     PrefService* pref_service) {
206   // Verify that the now time is available. There are situations where the now
207   // time cannot be provided.
208   if (now.is_null()) {
209     return UserDemographicsResult::ForStatus(
210         UserDemographicsStatus::kCannotGetTime);
211   }
212 
213   // Get the synced user’s noised birth year and gender from synced prefs. Only
214   // one error status code should be used to represent the case where
215   // demographics are ineligible, see doc of UserDemographicsStatus in
216   // user_demographics.h for more details.
217 
218   // Get the pref that contains the user's birth year and gender.
219   const base::DictionaryValue* demographics =
220       pref_service->GetDictionary(kSyncDemographicsPrefName);
221   DCHECK(demographics != nullptr);
222 
223   // Get the user's birth year.
224   base::Optional<int> birth_year = GetUserBirthYear(demographics);
225   if (!birth_year.has_value()) {
226     return UserDemographicsResult::ForStatus(
227         UserDemographicsStatus::kIneligibleDemographicsData);
228   }
229 
230   // Get the user's gender.
231   base::Optional<UserDemographicsProto_Gender> gender =
232       GetUserGender(demographics);
233   if (!gender.has_value()) {
234     return UserDemographicsResult::ForStatus(
235         UserDemographicsStatus::kIneligibleDemographicsData);
236   }
237 
238   // Get the offset and do one last check that the birth year is eligible.
239   int offset = GetBirthYearOffset(pref_service);
240   if (!HasEligibleBirthYear(now, *birth_year, offset)) {
241     return UserDemographicsResult::ForStatus(
242         UserDemographicsStatus::kIneligibleDemographicsData);
243   }
244 
245   // Set gender and noised birth year in demographics.
246   UserDemographics user_demographics;
247   user_demographics.gender = *gender;
248   user_demographics.birth_year = *birth_year + offset;
249 
250   return UserDemographicsResult::ForValue(std::move(user_demographics));
251 }
252 
253 }  // namespace metrics
254