1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/metrics/demographics/user_demographics.h"
6
7 #include <utility>
8
9 #include "base/check.h"
10 #include "base/optional.h"
11 #include "base/rand_util.h"
12 #include "base/values.h"
13 #include "components/pref_registry/pref_registry_syncable.h"
14 #include "components/prefs/pref_service.h"
15
16 namespace metrics {
17
18 // Root dictionary pref to store the user's birth year and gender that are
19 // provided by the sync server. This is a read-only syncable priority pref, sent
20 // from the sync server to the client.
21 const char kSyncDemographicsPrefName[] = "sync.demographics";
22
23 // Stores a "secret" offset that is used to randomize the birth year for metrics
24 // reporting. This value should not be logged to UMA directly; instead, it
25 // should be summed with the kSyncDemographicsBirthYear. This value is generated
26 // locally on the client the first time a user begins to merge birth year data
27 // into their UMA reports. The value is synced to the user's other devices so
28 // that the user consistently uses the same offset across login/logout events
29 // and after clearing their other browser data.
30 const char kSyncDemographicsBirthYearOffsetPrefName[] =
31 "sync.demographics_birth_year_offset";
32
33 // This pref value is subordinate to the kSyncDemographics dictionary pref and
34 // is synced to the client. It stores the self-reported birth year of the
35 // syncing user. as provided by the sync server. This value should not be logged
36 // to UMA directly; instead, it should be summed with the
37 // kSyncDemographicsBirthYearNoiseOffset.
38 const char kSyncDemographicsBirthYearPath[] = "birth_year";
39
40 // This pref value is subordinate to the kSyncDemographics dictionary pref and
41 // is synced to the client. It stores the self-reported gender of the syncing
42 // user, as provided by the sync server. The gender is encoded using the Gender
43 // enum defined in UserDemographicsProto
44 // (see third_party/metrics_proto/user_demographics.proto).
45 const char kSyncDemographicsGenderPath[] = "gender";
46
47 namespace {
48
49 // Gets an offset to add noise to the birth year. If not present in prefs, the
50 // offset will be randomly generated within the offset range and cached in
51 // syncable prefs.
GetBirthYearOffset(PrefService * pref_service)52 int GetBirthYearOffset(PrefService* pref_service) {
53 int offset =
54 pref_service->GetInteger(kSyncDemographicsBirthYearOffsetPrefName);
55 if (offset == kUserDemographicsBirthYearNoiseOffsetDefaultValue) {
56 // Generate a random offset when not cached in prefs.
57 offset = base::RandInt(-kUserDemographicsBirthYearNoiseOffsetRange,
58 kUserDemographicsBirthYearNoiseOffsetRange);
59 pref_service->SetInteger(kSyncDemographicsBirthYearOffsetPrefName, offset);
60 }
61 return offset;
62 }
63
64 // Determines whether the synced user has provided a birth year to Google which
65 // is eligible, once aggregated and anonymized, to measure usage of Chrome
66 // features by age groups. See doc of DemographicMetricsProvider in
67 // demographic_metrics_provider.h for more details.
HasEligibleBirthYear(base::Time now,int user_birth_year,int offset)68 bool HasEligibleBirthYear(base::Time now, int user_birth_year, int offset) {
69 // Compute user age.
70 base::Time::Exploded exploded_now_time;
71 now.LocalExplode(&exploded_now_time);
72 int user_age = exploded_now_time.year - (user_birth_year + offset);
73
74 // Verify if the synced user's age has a population size in the age
75 // distribution of the society that is big enough to not raise the entropy of
76 // the demographics too much. At a certain point, as the age increase, the
77 // size of the population starts declining sharply as you can see in this
78 // approximate representation of the age distribution:
79 // | ________ max age
80 // |______/ \_________ |
81 // | |\
82 // | | \
83 // +--------------------------|---------
84 // 0 10 20 30 40 50 60 70 80 90 100+
85 if (user_age > kUserDemographicsMaxAgeInYears)
86 return false;
87
88 // Verify if the synced user is old enough. Use > rather than >= because we
89 // want to be sure that the user is at least |kUserDemographicsMinAgeInYears|
90 // without disclosing their birth date, which requires to add an extra year
91 // margin to the minimal age to be safe. For example, if we are in 2019-07-10
92 // (now) and the user was born in 1999-08-10, the user is not yet 20 years old
93 // (minimal age) but we cannot know that because we only have access to the
94 // year of the dates (2019 and 1999 respectively). If we make sure that the
95 // minimal age (computed at year granularity) is at least 21, we are 100% sure
96 // that the user will be at least 20 years old when providing the user’s birth
97 // year and gender.
98 return user_age > kUserDemographicsMinAgeInYears;
99 }
100
101 // Gets the synced user's birth year from synced prefs, see doc of
102 // DemographicMetricsProvider in demographic_metrics_provider.h for more
103 // details.
GetUserBirthYear(const base::DictionaryValue * demographics)104 base::Optional<int> GetUserBirthYear(
105 const base::DictionaryValue* demographics) {
106 const base::Value* value =
107 demographics->FindPath(kSyncDemographicsBirthYearPath);
108 int birth_year = (value != nullptr && value->is_int())
109 ? value->GetInt()
110 : kUserDemographicsBirthYearDefaultValue;
111
112 // Verify that there is a birth year.
113 if (birth_year == kUserDemographicsBirthYearDefaultValue)
114 return base::nullopt;
115
116 return birth_year;
117 }
118
119 // Gets the synced user's gender from synced prefs, see doc of
120 // DemographicMetricsProvider in demographic_metrics_provider.h for more
121 // details.
GetUserGender(const base::DictionaryValue * demographics)122 base::Optional<UserDemographicsProto_Gender> GetUserGender(
123 const base::DictionaryValue* demographics) {
124 const base::Value* value =
125 demographics->FindPath(kSyncDemographicsGenderPath);
126 int gender_int = (value != nullptr && value->is_int())
127 ? value->GetInt()
128 : kUserDemographicsGenderDefaultValue;
129
130 // Verify that the gender is not default.
131 if (gender_int == kUserDemographicsGenderDefaultValue)
132 return base::nullopt;
133
134 // Verify that the gender number is a valid UserDemographicsProto_Gender
135 // encoding.
136 if (!UserDemographicsProto_Gender_IsValid(gender_int))
137 return base::nullopt;
138
139 auto gender = UserDemographicsProto_Gender(gender_int);
140
141 // Verify that the gender is in a large enough population set to preserve
142 // anonymity.
143 if (gender != UserDemographicsProto::GENDER_FEMALE &&
144 gender != UserDemographicsProto::GENDER_MALE) {
145 return base::nullopt;
146 }
147
148 return gender;
149 }
150
151 } // namespace
152
153 // static
ForValue(UserDemographics value)154 UserDemographicsResult UserDemographicsResult::ForValue(
155 UserDemographics value) {
156 return UserDemographicsResult(std::move(value),
157 UserDemographicsStatus::kSuccess);
158 }
159
160 // static
ForStatus(UserDemographicsStatus status)161 UserDemographicsResult UserDemographicsResult::ForStatus(
162 UserDemographicsStatus status) {
163 DCHECK(status != UserDemographicsStatus::kSuccess);
164 return UserDemographicsResult(UserDemographics(), status);
165 }
166
IsSuccess() const167 bool UserDemographicsResult::IsSuccess() const {
168 return status_ == UserDemographicsStatus::kSuccess;
169 }
170
status() const171 UserDemographicsStatus UserDemographicsResult::status() const {
172 return status_;
173 }
174
value() const175 const UserDemographics& UserDemographicsResult::value() const {
176 return value_;
177 }
178
UserDemographicsResult(UserDemographics value,UserDemographicsStatus status)179 UserDemographicsResult::UserDemographicsResult(UserDemographics value,
180 UserDemographicsStatus status)
181 : value_(std::move(value)), status_(status) {}
182
RegisterDemographicsProfilePrefs(user_prefs::PrefRegistrySyncable * registry)183 void RegisterDemographicsProfilePrefs(
184 user_prefs::PrefRegistrySyncable* registry) {
185 registry->RegisterDictionaryPref(
186 kSyncDemographicsPrefName,
187 user_prefs::PrefRegistrySyncable::SYNCABLE_PRIORITY_PREF);
188 registry->RegisterIntegerPref(
189 kSyncDemographicsBirthYearOffsetPrefName,
190 kUserDemographicsBirthYearNoiseOffsetDefaultValue,
191 user_prefs::PrefRegistrySyncable::SYNCABLE_PREF);
192 }
193
ClearDemographicsPrefs(PrefService * pref_service)194 void ClearDemographicsPrefs(PrefService* pref_service) {
195 // Clear user's birth year and gender.
196 // Note that we retain kSyncDemographicsBirthYearOffset. If the user resumes
197 // syncing, causing these prefs to be recreated, we don't want them to start
198 // reporting a different randomized birth year as this could narrow down or
199 // even reveal their true birth year.
200 pref_service->ClearPref(kSyncDemographicsPrefName);
201 }
202
GetUserNoisedBirthYearAndGenderFromPrefs(base::Time now,PrefService * pref_service)203 UserDemographicsResult GetUserNoisedBirthYearAndGenderFromPrefs(
204 base::Time now,
205 PrefService* pref_service) {
206 // Verify that the now time is available. There are situations where the now
207 // time cannot be provided.
208 if (now.is_null()) {
209 return UserDemographicsResult::ForStatus(
210 UserDemographicsStatus::kCannotGetTime);
211 }
212
213 // Get the synced user’s noised birth year and gender from synced prefs. Only
214 // one error status code should be used to represent the case where
215 // demographics are ineligible, see doc of UserDemographicsStatus in
216 // user_demographics.h for more details.
217
218 // Get the pref that contains the user's birth year and gender.
219 const base::DictionaryValue* demographics =
220 pref_service->GetDictionary(kSyncDemographicsPrefName);
221 DCHECK(demographics != nullptr);
222
223 // Get the user's birth year.
224 base::Optional<int> birth_year = GetUserBirthYear(demographics);
225 if (!birth_year.has_value()) {
226 return UserDemographicsResult::ForStatus(
227 UserDemographicsStatus::kIneligibleDemographicsData);
228 }
229
230 // Get the user's gender.
231 base::Optional<UserDemographicsProto_Gender> gender =
232 GetUserGender(demographics);
233 if (!gender.has_value()) {
234 return UserDemographicsResult::ForStatus(
235 UserDemographicsStatus::kIneligibleDemographicsData);
236 }
237
238 // Get the offset and do one last check that the birth year is eligible.
239 int offset = GetBirthYearOffset(pref_service);
240 if (!HasEligibleBirthYear(now, *birth_year, offset)) {
241 return UserDemographicsResult::ForStatus(
242 UserDemographicsStatus::kIneligibleDemographicsData);
243 }
244
245 // Set gender and noised birth year in demographics.
246 UserDemographics user_demographics;
247 user_demographics.gender = *gender;
248 user_demographics.birth_year = *birth_year + offset;
249
250 return UserDemographicsResult::ForValue(std::move(user_demographics));
251 }
252
253 } // namespace metrics
254