1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "ui/accessibility/ax_language_detection.h"
6 #include <algorithm>
7 #include <functional>
8 
9 #include "base/command_line.h"
10 #include "base/i18n/unicodestring.h"
11 #include "base/metrics/histogram_functions.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "base/trace_event/trace_event.h"
15 #include "ui/accessibility/accessibility_features.h"
16 #include "ui/accessibility/accessibility_switches.h"
17 #include "ui/accessibility/ax_enums.mojom.h"
18 #include "ui/accessibility/ax_tree.h"
19 
20 namespace ui {
21 
22 namespace {
23 // This is the maximum number of languages we assign per page, so only the top
24 // 3 languages on the top will be assigned to any node.
25 const int kMaxDetectedLanguagesPerPage = 3;
26 
27 // This is the maximum number of languages that cld3 will detect for each
28 // input we give it, 3 was recommended to us by the ML team as a good
29 // starting point.
30 const int kMaxDetectedLanguagesPerSpan = 3;
31 
32 const int kShortTextIdentifierMinByteLength = 1;
33 // TODO(https://crbug.com/971360): Determine appropriate value for
34 // |kShortTextIdentifierMaxByteLength|.
35 const int kShortTextIdentifierMaxByteLength = 1000;
36 }  // namespace
37 
38 using Result = chrome_lang_id::NNetLanguageIdentifier::Result;
39 using SpanInfo = chrome_lang_id::NNetLanguageIdentifier::SpanInfo;
40 
41 AXLanguageInfo::AXLanguageInfo() = default;
42 AXLanguageInfo::~AXLanguageInfo() = default;
43 
AXLanguageInfoStats()44 AXLanguageInfoStats::AXLanguageInfoStats()
45     : top_results_valid_(false),
46       disable_metric_clearing_(false),
47       count_detection_attempted_(0),
48       count_detection_results_(0),
49       count_labelled_(0),
50       count_labelled_with_top_result_(0),
51       count_overridden_(0) {}
52 
53 AXLanguageInfoStats::~AXLanguageInfoStats() = default;
54 
Add(const std::vector<std::string> & languages)55 void AXLanguageInfoStats::Add(const std::vector<std::string>& languages) {
56   // Count this as a successful detection with results.
57   ++count_detection_results_;
58 
59   // Assign languages with higher probability a higher score.
60   // TODO(chrishall): consider more complex scoring
61   int score = kMaxDetectedLanguagesPerSpan;
62   for (const auto& lang : languages) {
63     lang_counts_[lang] += score;
64 
65     // Record the highest scoring detected languages for each node.
66     if (score == kMaxDetectedLanguagesPerSpan)
67       unique_top_lang_detected_.insert(lang);
68 
69     --score;
70   }
71 
72   InvalidateTopResults();
73 }
74 
GetScore(const std::string & lang) const75 int AXLanguageInfoStats::GetScore(const std::string& lang) const {
76   const auto& lang_count_it = lang_counts_.find(lang);
77   if (lang_count_it == lang_counts_.end()) {
78     return 0;
79   }
80   return lang_count_it->second;
81 }
82 
InvalidateTopResults()83 void AXLanguageInfoStats::InvalidateTopResults() {
84   top_results_valid_ = false;
85 }
86 
87 // Check if a given language is within the top results.
CheckLanguageWithinTop(const std::string & lang)88 bool AXLanguageInfoStats::CheckLanguageWithinTop(const std::string& lang) {
89   if (!top_results_valid_) {
90     GenerateTopResults();
91   }
92 
93   for (const auto& item : top_results_) {
94     if (lang == item.second) {
95       return true;
96     }
97   }
98 
99   return false;
100 }
101 
GenerateTopResults()102 void AXLanguageInfoStats::GenerateTopResults() {
103   top_results_.clear();
104 
105   for (const auto& item : lang_counts_) {
106     top_results_.emplace_back(item.second, item.first);
107   }
108 
109   // Since we store the pair as (score, language) the default operator> on pairs
110   // does our sort appropriately.
111   // Sort in descending order.
112   std::sort(top_results_.begin(), top_results_.end(), std::greater<>());
113 
114   // Resize down to remove all values greater than the N we are considering.
115   // TODO(chrishall): In the event of a tie, we want to include more than N.
116   top_results_.resize(kMaxDetectedLanguagesPerPage);
117 
118   top_results_valid_ = true;
119 }
120 
RecordLabelStatistics(const std::string & labelled_lang,const std::string & author_lang,bool labelled_with_first_result)121 void AXLanguageInfoStats::RecordLabelStatistics(
122     const std::string& labelled_lang,
123     const std::string& author_lang,
124     bool labelled_with_first_result) {
125   // Count the number of nodes we labelled, and the number we labelled with
126   // our highest confidence result.
127   ++count_labelled_;
128 
129   if (labelled_with_first_result)
130     ++count_labelled_with_top_result_;
131 
132   // Record if we assigned a language that disagrees with the author
133   // provided language for that node.
134   if (author_lang != labelled_lang)
135     ++count_overridden_;
136 }
137 
RecordDetectionAttempt()138 void AXLanguageInfoStats::RecordDetectionAttempt() {
139   ++count_detection_attempted_;
140 }
141 
ReportMetrics()142 void AXLanguageInfoStats::ReportMetrics() {
143   // Only report statistics for pages which had detected results.
144   if (!count_detection_attempted_)
145     return;
146 
147   // 50 buckets exponentially covering the range from 1 to 1000.
148   base::UmaHistogramCustomCounts(
149       "Accessibility.LanguageDetection.CountDetectionAttempted",
150       count_detection_attempted_, 1, 1000, 50);
151 
152   int percentage_detected =
153       count_detection_results_ * 100 / count_detection_attempted_;
154   base::UmaHistogramPercentageObsoleteDoNotUse(
155       "Accessibility.LanguageDetection.PercentageLanguageDetected",
156       percentage_detected);
157 
158   // 50 buckets exponentially covering the range from 1 to 1000.
159   base::UmaHistogramCustomCounts(
160       "Accessibility.LanguageDetection.CountLabelled", count_labelled_, 1, 1000,
161       50);
162 
163   // If no nodes were labelled, then the percentage labelled with the top result
164   // doesn't make sense to report.
165   if (count_labelled_) {
166     int percentage_top =
167         count_labelled_with_top_result_ * 100 / count_labelled_;
168     base::UmaHistogramPercentageObsoleteDoNotUse(
169         "Accessibility.LanguageDetection.PercentageLabelledWithTop",
170         percentage_top);
171 
172     int percentage_overridden = count_overridden_ * 100 / count_labelled_;
173     base::UmaHistogramPercentageObsoleteDoNotUse(
174         "Accessibility.LanguageDetection.PercentageOverridden",
175         percentage_overridden);
176   }
177 
178   // Exact count from 0 to 15, overflow is then truncated to 15.
179   base::UmaHistogramExactLinear("Accessibility.LanguageDetection.LangsPerPage",
180                                 unique_top_lang_detected_.size(), 15);
181 
182   // TODO(chrishall): Consider adding timing metrics for performance, consider:
183   //  - detect step.
184   //  - label step.
185   //  - total initial static detection & label timing.
186   //  - total incremental dynamic detection & label timing.
187 
188   // Reset statistics for metrics.
189   ClearMetrics();
190 }
191 
ClearMetrics()192 void AXLanguageInfoStats::ClearMetrics() {
193   // Do not clear metrics if we are specifically testing metrics.
194   if (disable_metric_clearing_)
195     return;
196 
197   unique_top_lang_detected_.clear();
198   count_detection_attempted_ = 0;
199   count_detection_results_ = 0;
200   count_labelled_ = 0;
201   count_labelled_with_top_result_ = 0;
202   count_overridden_ = 0;
203 }
204 
AXLanguageDetectionManager(AXTree * tree)205 AXLanguageDetectionManager::AXLanguageDetectionManager(AXTree* tree)
206     : short_text_language_identifier_(kShortTextIdentifierMinByteLength,
207                                       kShortTextIdentifierMaxByteLength),
208       tree_(tree) {}
209 
210 AXLanguageDetectionManager::~AXLanguageDetectionManager() = default;
211 
IsStaticLanguageDetectionEnabled()212 bool AXLanguageDetectionManager::IsStaticLanguageDetectionEnabled() {
213   // Static language detection can be enabled by either:
214   //  1) The general language detection feature flag which gates both static and
215   //     dynamic language detection (feature flag for experiment), or
216   //  2) The Static specific flag (user controlled switch).
217   return features::IsAccessibilityLanguageDetectionEnabled() ||
218          ::switches::IsExperimentalAccessibilityLanguageDetectionEnabled();
219 }
220 
IsDynamicLanguageDetectionEnabled()221 bool AXLanguageDetectionManager::IsDynamicLanguageDetectionEnabled() {
222   // Dynamic language detection can be enabled by either:
223   //  1) The general language detection feature flag which gates both static and
224   //     dynamic language detection (feature flag for experiment), or
225   //  2) The Dynamic specific flag (user controlled switch).
226   return features::IsAccessibilityLanguageDetectionEnabled() ||
227          ::switches::
228              IsExperimentalAccessibilityLanguageDetectionDynamicEnabled();
229 }
230 
RegisterLanguageDetectionObserver()231 void AXLanguageDetectionManager::RegisterLanguageDetectionObserver() {
232   // Do not perform dynamic language detection unless explicitly enabled.
233   if (!IsDynamicLanguageDetectionEnabled()) {
234     return;
235   }
236 
237   // Construct our new Observer as requested.
238   // If there is already an Observer on this Manager then this will destroy it.
239   language_detection_observer_.reset(new AXLanguageDetectionObserver(tree_));
240 }
241 
242 // Detect languages for each node.
DetectLanguages()243 void AXLanguageDetectionManager::DetectLanguages() {
244   TRACE_EVENT0("accessibility", "AXLanguageInfo::DetectLanguages");
245 
246   if (!IsStaticLanguageDetectionEnabled()) {
247     return;
248   }
249 
250   DetectLanguagesForSubtree(tree_->root());
251 }
252 
253 // Detect languages for a subtree rooted at the given subtree_root.
254 // Will not check feature flag.
DetectLanguagesForSubtree(AXNode * subtree_root)255 void AXLanguageDetectionManager::DetectLanguagesForSubtree(
256     AXNode* subtree_root) {
257   // Only perform detection for kStaticText nodes.
258   //
259   // Do not visit the children of kStaticText nodes as they don't have
260   // interesting children for language detection.
261   //
262   // Since kInlineTextBox(es) contain text from their parent, any detection on
263   // them is redundant. Instead they can inherit the detected language.
264   if (subtree_root->data().role == ax::mojom::Role::kStaticText) {
265     DetectLanguagesForNode(subtree_root);
266   } else {
267     // Otherwise, recurse into children for detection.
268     for (AXNode* child : subtree_root->children()) {
269       DetectLanguagesForSubtree(child);
270     }
271   }
272 }
273 
274 // Detect languages for a single node.
275 // Will not descend into children.
276 // Will not check feature flag.
DetectLanguagesForNode(AXNode * node)277 void AXLanguageDetectionManager::DetectLanguagesForNode(AXNode* node) {
278   // Count this detection attempt.
279   lang_info_stats_.RecordDetectionAttempt();
280 
281   // TODO(chrishall): implement strategy for nodes which are too small to get
282   // reliable language detection results. Consider combination of
283   // concatenation and bubbling up results.
284   auto text = node->GetStringAttribute(ax::mojom::StringAttribute::kName);
285 
286   // FindTopNMostFreqLangs() will pad the results with
287   // |NNetLanguageIdentifier::kUnknown| in order to reach the requested number
288   // of languages, this means we cannot rely on the results' length and we
289   // have to filter the results.
290   const std::vector<Result> results =
291       language_identifier_.FindTopNMostFreqLangs(text,
292                                                  kMaxDetectedLanguagesPerSpan);
293 
294   std::vector<std::string> reliable_results;
295 
296   for (const auto& res : results) {
297     // The output of FindTopNMostFreqLangs() is already sorted by byte count,
298     // this seems good enough for now.
299     // Only consider results which are 'reliable', this will also remove
300     // 'unknown'.
301     if (res.is_reliable) {
302       reliable_results.push_back(res.language);
303     }
304   }
305 
306   // Only allocate a(n) LanguageInfo if we have results worth keeping.
307   if (reliable_results.size()) {
308     AXLanguageInfo* lang_info = node->GetLanguageInfo();
309     if (lang_info) {
310       // Clear previously detected and labelled languages.
311       lang_info->detected_languages.clear();
312       lang_info->language.clear();
313     } else {
314       node->SetLanguageInfo(std::make_unique<AXLanguageInfo>());
315       lang_info = node->GetLanguageInfo();
316     }
317 
318     // Keep these results.
319     lang_info->detected_languages = std::move(reliable_results);
320 
321     // Update statistics to take these results into account.
322     lang_info_stats_.Add(lang_info->detected_languages);
323   }
324 }
325 
326 // Label languages for each node. This relies on DetectLanguages having already
327 // been run.
LabelLanguages()328 void AXLanguageDetectionManager::LabelLanguages() {
329   TRACE_EVENT0("accessibility", "AXLanguageInfo::LabelLanguages");
330 
331   if (!IsStaticLanguageDetectionEnabled()) {
332     return;
333   }
334 
335   LabelLanguagesForSubtree(tree_->root());
336 
337   // TODO(chrishall): consider refactoring to have a more clearly named entry
338   // point for static language detection.
339   //
340   // LabelLanguages is only called for the initial run of language detection for
341   // static content, this call to ReportMetrics therefore covers only the work
342   // we performed in response to a page load complete event.
343   lang_info_stats_.ReportMetrics();
344 }
345 
346 // Label languages for each node in the subtree rooted at the given
347 // subtree_root. Will not check feature flag.
LabelLanguagesForSubtree(AXNode * subtree_root)348 void AXLanguageDetectionManager::LabelLanguagesForSubtree(
349     AXNode* subtree_root) {
350   LabelLanguagesForNode(subtree_root);
351 
352   // Recurse into children to continue labelling.
353   for (AXNode* child : subtree_root->children()) {
354     LabelLanguagesForSubtree(child);
355   }
356 }
357 
358 // Label languages for a single node.
359 // Will not descend into children.
360 // Will not check feature flag.
LabelLanguagesForNode(AXNode * node)361 void AXLanguageDetectionManager::LabelLanguagesForNode(AXNode* node) {
362   AXLanguageInfo* lang_info = node->GetLanguageInfo();
363   if (!lang_info)
364     return;
365 
366   // There is no work to do if we already have an assigned (non-empty) language.
367   if (lang_info->language.size())
368     return;
369 
370   // Assign the highest probability language which is both:
371   // 1) reliably detected for this node, and
372   // 2) one of the top (kMaxDetectedLanguagesPerPage) languages on this page.
373   //
374   // This helps guard against false positives for nodes which have noisy
375   // language detection results in isolation.
376   //
377   // Note that we assign a language even if it is the same as the author's
378   // annotation. This may not be needed in practice. In theory this would help
379   // if the author later on changed the language annotation to be incorrect, but
380   // this seems unlikely to occur in practice.
381   //
382   // TODO(chrishall): consider optimisation: only assign language if it
383   // disagrees with author's language annotation.
384   bool labelled_with_first_result = true;
385   for (const auto& lang : lang_info->detected_languages) {
386     if (lang_info_stats_.CheckLanguageWithinTop(lang)) {
387       lang_info->language = lang;
388 
389       const std::string& author_lang = node->GetInheritedStringAttribute(
390           ax::mojom::StringAttribute::kLanguage);
391       lang_info_stats_.RecordLabelStatistics(lang, author_lang,
392                                              labelled_with_first_result);
393 
394       // After assigning a label we no longer need detected languages.
395       // NB: clearing this invalidates the reference `lang`, so we must do this
396       // last and then immediately return.
397       lang_info->detected_languages.clear();
398 
399       return;
400     }
401     labelled_with_first_result = false;
402   }
403 
404   // If we didn't label a language, then we can discard all language detection
405   // information for this node.
406   node->ClearLanguageInfo();
407 }
408 
409 std::vector<AXLanguageSpan>
GetLanguageAnnotationForStringAttribute(const AXNode & node,ax::mojom::StringAttribute attr)410 AXLanguageDetectionManager::GetLanguageAnnotationForStringAttribute(
411     const AXNode& node,
412     ax::mojom::StringAttribute attr) {
413   std::vector<AXLanguageSpan> language_annotation;
414   if (!node.HasStringAttribute(attr))
415     return language_annotation;
416 
417   std::string attr_value = node.GetStringAttribute(attr);
418 
419   // Use author-provided language if present.
420   if (node.HasStringAttribute(ax::mojom::StringAttribute::kLanguage)) {
421     // Use author-provided language if present.
422     language_annotation.push_back(AXLanguageSpan{
423         0 /* start_index */, attr_value.length() /* end_index */,
424         node.GetStringAttribute(
425             ax::mojom::StringAttribute::kLanguage) /* language */,
426         1 /* probability */});
427     return language_annotation;
428   }
429   // Calculate top 3 languages.
430   // TODO(akihiroota): What's a reasonable number of languages to have
431   // cld_3 find? Should vary.
432   std::vector<Result> top_languages =
433       short_text_language_identifier_.FindTopNMostFreqLangs(
434           attr_value, kMaxDetectedLanguagesPerPage);
435   // Create vector of AXLanguageSpans.
436   for (const auto& result : top_languages) {
437     const std::vector<SpanInfo>& ranges = result.byte_ranges;
438     for (const auto& span_info : ranges) {
439       language_annotation.push_back(
440           AXLanguageSpan{span_info.start_index, span_info.end_index,
441                          result.language, span_info.probability});
442     }
443   }
444   // Sort Language Annotations by increasing start index. LanguageAnnotations
445   // with lower start index should appear earlier in the vector.
446   std::sort(
447       language_annotation.begin(), language_annotation.end(),
448       [](const AXLanguageSpan& left, const AXLanguageSpan& right) -> bool {
449         return left.start_index <= right.start_index;
450       });
451   // Ensure that AXLanguageSpans do not overlap.
452   for (size_t i = 0; i < language_annotation.size(); ++i) {
453     if (i > 0) {
454       DCHECK(language_annotation[i].start_index <=
455              language_annotation[i - 1].end_index);
456     }
457   }
458   return language_annotation;
459 }
460 
AXLanguageDetectionObserver(AXTree * tree)461 AXLanguageDetectionObserver::AXLanguageDetectionObserver(AXTree* tree)
462     : tree_(tree) {
463   // We expect the feature flag to have be checked before this Observer is
464   // constructed, this should have been checked by
465   // RegisterLanguageDetectionObserver.
466   DCHECK(AXLanguageDetectionManager::IsDynamicLanguageDetectionEnabled());
467 
468   tree_->AddObserver(this);
469 }
470 
~AXLanguageDetectionObserver()471 AXLanguageDetectionObserver::~AXLanguageDetectionObserver() {
472   tree_->RemoveObserver(this);
473 }
474 
OnAtomicUpdateFinished(ui::AXTree * tree,bool root_changed,const std::vector<Change> & changes)475 void AXLanguageDetectionObserver::OnAtomicUpdateFinished(
476     ui::AXTree* tree,
477     bool root_changed,
478     const std::vector<Change>& changes) {
479   // TODO(chrishall): We likely want to re-consider updating or resetting
480   // AXLanguageInfoStats over time to better support detection on long running
481   // pages.
482 
483   // TODO(chrishall): To support pruning deleted node data from stats we should
484   // consider implementing OnNodeWillBeDeleted. Other options available include:
485   // 1) move lang info from AXNode into a map on AXTree so that we can fetch
486   //    based on id in here
487   // 2) AXLanguageInfo destructor could remove itself
488 
489   // TODO(chrishall): Possible optimisation: only run detect/label for certain
490   // change.type(s)), at least NODE_CREATED, NODE_CHANGED, and SUBTREE_CREATED.
491 
492   DCHECK(tree->language_detection_manager);
493 
494   // Perform Detect and Label for each node changed or created.
495   // We currently only consider nodes with a role of kStaticText for detection.
496   //
497   // Note that language inheritance is now handled by AXNode::GetLanguage.
498   //
499   // Note that since Label no longer handles language inheritance, we only need
500   // to call Label and Detect on the nodes that changed and don't need to
501   // recurse.
502   //
503   // We do this in two passes because Detect updates page level statistics which
504   // are later used by Label in order to make more accurate decisions.
505 
506   for (auto& change : changes) {
507     if (change.node->data().role == ax::mojom::Role::kStaticText) {
508       tree->language_detection_manager->DetectLanguagesForNode(change.node);
509     }
510   }
511 
512   for (auto& change : changes) {
513     if (change.node->data().role == ax::mojom::Role::kStaticText) {
514       tree->language_detection_manager->LabelLanguagesForNode(change.node);
515     }
516   }
517 
518   // OnAtomicUpdateFinished is used for dynamic language detection, this call to
519   // ReportMetrics covers only the work we have performed in response to one
520   // update to the AXTree.
521   tree->language_detection_manager->lang_info_stats_.ReportMetrics();
522 }
523 
524 }  // namespace ui
525