1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "pdf/pdfium/pdfium_page.h"
6 
7 #include <math.h>
8 #include <stddef.h>
9 
10 #include <algorithm>
11 #include <memory>
12 #include <utility>
13 
14 #include "base/logging.h"
15 #include "base/numerics/math_constants.h"
16 #include "base/numerics/safe_math.h"
17 #include "base/strings/string_number_conversions.h"
18 #include "base/strings/string_util.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
21 #include "pdf/pdfium/pdfium_engine.h"
22 #include "pdf/pdfium/pdfium_unsupported_features.h"
23 #include "ppapi/c/private/ppb_pdf.h"
24 #include "printing/units.h"
25 #include "third_party/pdfium/public/cpp/fpdf_scopers.h"
26 #include "third_party/pdfium/public/fpdf_annot.h"
27 #include "third_party/pdfium/public/fpdf_catalog.h"
28 
29 using printing::ConvertUnitDouble;
30 using printing::kPointsPerInch;
31 using printing::kPixelsPerInch;
32 
33 namespace chrome_pdf {
34 
35 namespace {
36 
37 constexpr float k45DegreesInRadians = base::kPiFloat / 4;
38 constexpr float k90DegreesInRadians = base::kPiFloat / 2;
39 constexpr float k180DegreesInRadians = base::kPiFloat;
40 constexpr float k270DegreesInRadians = 3 * base::kPiFloat / 2;
41 constexpr float k360DegreesInRadians = 2 * base::kPiFloat;
42 
43 PDFiumPage::IsValidLinkFunction g_is_valid_link_func_for_testing = nullptr;
44 
45 // If the link cannot be converted to a pp::Var, then it is not possible to
46 // pass it to JS. In this case, ignore the link like other PDF viewers.
47 // See https://crbug.com/312882 for an example.
48 // TODO(crbug.com/702993): Get rid of the PPAPI usage here, as well as
49 // SetIsValidLinkFunctionForTesting() and related code.
IsValidLink(const std::string & url)50 bool IsValidLink(const std::string& url) {
51   return pp::Var(url).is_string();
52 }
53 
FloatPageRectToPixelRect(FPDF_PAGE page,const pp::FloatRect & input)54 pp::FloatRect FloatPageRectToPixelRect(FPDF_PAGE page,
55                                        const pp::FloatRect& input) {
56   int output_width = FPDF_GetPageWidth(page);
57   int output_height = FPDF_GetPageHeight(page);
58 
59   int min_x;
60   int min_y;
61   int max_x;
62   int max_y;
63   FPDF_BOOL ret = FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0,
64                                     input.x(), input.y(), &min_x, &min_y);
65   DCHECK(ret);
66   ret = FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0,
67                           input.right(), input.bottom(), &max_x, &max_y);
68   DCHECK(ret);
69 
70   if (max_x < min_x)
71     std::swap(min_x, max_x);
72   if (max_y < min_y)
73     std::swap(min_y, max_y);
74 
75   pp::FloatRect output_rect(
76       ConvertUnitDouble(min_x, kPointsPerInch, kPixelsPerInch),
77       ConvertUnitDouble(min_y, kPointsPerInch, kPixelsPerInch),
78       ConvertUnitDouble(max_x - min_x, kPointsPerInch, kPixelsPerInch),
79       ConvertUnitDouble(max_y - min_y, kPointsPerInch, kPixelsPerInch));
80   return output_rect;
81 }
82 
GetFloatCharRectInPixels(FPDF_PAGE page,FPDF_TEXTPAGE text_page,int index)83 pp::FloatRect GetFloatCharRectInPixels(FPDF_PAGE page,
84                                        FPDF_TEXTPAGE text_page,
85                                        int index) {
86   double left;
87   double right;
88   double bottom;
89   double top;
90   if (!FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top))
91     return pp::FloatRect();
92 
93   if (right < left)
94     std::swap(left, right);
95   if (bottom < top)
96     std::swap(top, bottom);
97   pp::FloatRect page_coords(left, top, right - left, bottom - top);
98   return FloatPageRectToPixelRect(page, page_coords);
99 }
100 
GetFirstNonUnicodeWhiteSpaceCharIndex(FPDF_TEXTPAGE text_page,int start_char_index,int chars_count)101 int GetFirstNonUnicodeWhiteSpaceCharIndex(FPDF_TEXTPAGE text_page,
102                                           int start_char_index,
103                                           int chars_count) {
104   int i = start_char_index;
105   while (i < chars_count &&
106          base::IsUnicodeWhitespace(FPDFText_GetUnicode(text_page, i))) {
107     i++;
108   }
109   return i;
110 }
111 
GetDirectionFromAngle(float angle)112 PP_PrivateDirection GetDirectionFromAngle(float angle) {
113   // Rotating the angle by 45 degrees to simplify the conditions statements.
114   // It's like if we rotated the whole cartesian coordinate system like below.
115   //   X                   X
116   //     X      IV       X
117   //       X           X
118   //         X       X
119   //           X   X
120   //   III       X       I
121   //           X   X
122   //         X       X
123   //       X           X
124   //     X      II       X
125   //   X                   X
126 
127   angle = fmodf(angle + k45DegreesInRadians, k360DegreesInRadians);
128   // Quadrant I.
129   if (angle >= 0 && angle <= k90DegreesInRadians)
130     return PP_PRIVATEDIRECTION_LTR;
131   // Quadrant II.
132   if (angle > k90DegreesInRadians && angle <= k180DegreesInRadians)
133     return PP_PRIVATEDIRECTION_TTB;
134   // Quadrant III.
135   if (angle > k180DegreesInRadians && angle <= k270DegreesInRadians)
136     return PP_PRIVATEDIRECTION_RTL;
137   // Quadrant IV.
138   return PP_PRIVATEDIRECTION_BTT;
139 }
140 
GetDistanceBetweenPoints(const pp::FloatPoint & p1,const pp::FloatPoint & p2)141 float GetDistanceBetweenPoints(const pp::FloatPoint& p1,
142                                const pp::FloatPoint& p2) {
143   pp::FloatPoint dist_vector = p1 - p2;
144   return sqrtf(powf(dist_vector.x(), 2) + powf(dist_vector.y(), 2));
145 }
146 
AddCharSizeToAverageCharSize(pp::FloatSize new_size,pp::FloatSize * avg_size,int * count)147 void AddCharSizeToAverageCharSize(pp::FloatSize new_size,
148                                   pp::FloatSize* avg_size,
149                                   int* count) {
150   // Some characters sometimes have a bogus empty bounding box. We don't want
151   // them to impact the average.
152   if (!new_size.IsEmpty()) {
153     avg_size->set_width((avg_size->width() * *count + new_size.width()) /
154                         (*count + 1));
155     avg_size->set_height((avg_size->height() * *count + new_size.height()) /
156                          (*count + 1));
157     (*count)++;
158   }
159 }
160 
GetRotatedCharWidth(float angle,const pp::FloatSize & size)161 float GetRotatedCharWidth(float angle, const pp::FloatSize& size) {
162   return fabsf(cosf(angle) * size.width()) + fabsf(sinf(angle) * size.height());
163 }
164 
GetAngleOfVector(const pp::FloatPoint & v)165 float GetAngleOfVector(const pp::FloatPoint& v) {
166   float angle = atan2f(v.y(), v.x());
167   if (angle < 0)
168     angle += k360DegreesInRadians;
169   return angle;
170 }
171 
GetAngleDifference(float a,float b)172 float GetAngleDifference(float a, float b) {
173   // This is either the difference or (360 - difference).
174   float x = fmodf(fabsf(b - a), k360DegreesInRadians);
175   return x > k180DegreesInRadians ? k360DegreesInRadians - x : x;
176 }
177 
FloatEquals(float f1,float f2)178 bool FloatEquals(float f1, float f2) {
179   // The idea behind this is to use this fraction of the larger of the
180   // two numbers as the limit of the difference.  This breaks down near
181   // zero, so we reuse this as the minimum absolute size we will use
182   // for the base of the scale too.
183   static constexpr float kEpsilonScale = 0.00001f;
184   return fabsf(f1 - f2) <
185          kEpsilonScale * fmaxf(fmaxf(fabsf(f1), fabsf(f2)), kEpsilonScale);
186 }
187 
188 using GetFormFieldPropertyFunction =
189     base::RepeatingCallback<unsigned long(unsigned short* buffer,
190                                           unsigned long buflen)>;
191 
192 // Helper method to fetch string properties of form fields.
GetFormFieldProperty(GetFormFieldPropertyFunction function)193 std::string GetFormFieldProperty(GetFormFieldPropertyFunction function) {
194   base::string16 data;
195   size_t buffer_size = function.Run(nullptr, 0);
196   if (buffer_size > 0) {
197     PDFiumAPIStringBufferSizeInBytesAdapter<base::string16> api_string_adapter(
198         &data, buffer_size, true);
199     api_string_adapter.Close(function.Run(
200         reinterpret_cast<unsigned short*>(api_string_adapter.GetData()),
201         buffer_size));
202   }
203   return base::UTF16ToUTF8(data);
204 }
205 
206 }  // namespace
207 
LinkTarget()208 PDFiumPage::LinkTarget::LinkTarget() : page(-1) {}
209 
210 PDFiumPage::LinkTarget::LinkTarget(const LinkTarget& other) = default;
211 
212 PDFiumPage::LinkTarget::~LinkTarget() = default;
213 
PDFiumPage(PDFiumEngine * engine,int i)214 PDFiumPage::PDFiumPage(PDFiumEngine* engine, int i)
215     : engine_(engine), index_(i), available_(false) {}
216 
217 PDFiumPage::PDFiumPage(PDFiumPage&& that) = default;
218 
~PDFiumPage()219 PDFiumPage::~PDFiumPage() {
220   DCHECK_EQ(0, preventing_unload_count_);
221 }
222 
223 // static
SetIsValidLinkFunctionForTesting(IsValidLinkFunction function)224 void PDFiumPage::SetIsValidLinkFunctionForTesting(
225     IsValidLinkFunction function) {
226   g_is_valid_link_func_for_testing = function;
227 }
228 
Unload()229 void PDFiumPage::Unload() {
230   // Do not unload while in the middle of a load.
231   if (preventing_unload_count_)
232     return;
233 
234   text_page_.reset();
235 
236   if (page_) {
237     if (engine_->form()) {
238       FORM_OnBeforeClosePage(page(), engine_->form());
239     }
240     page_.reset();
241   }
242 }
243 
GetPage()244 FPDF_PAGE PDFiumPage::GetPage() {
245   ScopedUnsupportedFeature scoped_unsupported_feature(engine_);
246   if (!available_)
247     return nullptr;
248   if (!page_) {
249     ScopedUnloadPreventer scoped_unload_preventer(this);
250     page_.reset(FPDF_LoadPage(engine_->doc(), index_));
251     if (page_ && engine_->form()) {
252       FORM_OnAfterLoadPage(page(), engine_->form());
253     }
254   }
255   return page();
256 }
257 
GetTextPage()258 FPDF_TEXTPAGE PDFiumPage::GetTextPage() {
259   if (!available_)
260     return nullptr;
261   if (!text_page_) {
262     ScopedUnloadPreventer scoped_unload_preventer(this);
263     text_page_.reset(FPDFText_LoadPage(GetPage()));
264   }
265   return text_page();
266 }
267 
CalculatePageObjectTextRunBreaks()268 void PDFiumPage::CalculatePageObjectTextRunBreaks() {
269   if (calculated_page_object_text_run_breaks_)
270     return;
271 
272   calculated_page_object_text_run_breaks_ = true;
273   int chars_count = FPDFText_CountChars(GetTextPage());
274   if (chars_count == 0)
275     return;
276 
277   CalculateLinks();
278   for (const auto& link : links_) {
279     if (link.start_char_index >= 0 && link.start_char_index < chars_count) {
280       page_object_text_run_breaks_.insert(link.start_char_index);
281       int next_text_run_break_index = link.start_char_index + link.char_count;
282       // Don't insert a break if the link is at the end of the page text.
283       if (next_text_run_break_index < chars_count) {
284         page_object_text_run_breaks_.insert(next_text_run_break_index);
285       }
286     }
287   }
288 
289   PopulateAnnotations();
290   for (const auto& highlight : highlights_) {
291     if (highlight.start_char_index >= 0 &&
292         highlight.start_char_index < chars_count) {
293       page_object_text_run_breaks_.insert(highlight.start_char_index);
294       int next_text_run_break_index =
295           highlight.start_char_index + highlight.char_count;
296       // Don't insert a break if the highlight is at the end of the page text.
297       if (next_text_run_break_index < chars_count) {
298         page_object_text_run_breaks_.insert(next_text_run_break_index);
299       }
300     }
301   }
302 }
303 
CalculateTextRunStyleInfo(int char_index,pp::PDF::PrivateAccessibilityTextStyleInfo * style_info)304 void PDFiumPage::CalculateTextRunStyleInfo(
305     int char_index,
306     pp::PDF::PrivateAccessibilityTextStyleInfo* style_info) {
307   FPDF_TEXTPAGE text_page = GetTextPage();
308   style_info->font_size = FPDFText_GetFontSize(text_page, char_index);
309 
310   int flags = 0;
311   size_t buffer_size =
312       FPDFText_GetFontInfo(text_page, char_index, nullptr, 0, &flags);
313   if (buffer_size > 0) {
314     PDFiumAPIStringBufferAdapter<std::string> api_string_adapter(
315         &style_info->font_name, buffer_size, true);
316     void* data = api_string_adapter.GetData();
317     size_t bytes_written =
318         FPDFText_GetFontInfo(text_page, char_index, data, buffer_size, nullptr);
319     // Trim the null character.
320     api_string_adapter.Close(bytes_written);
321   }
322 
323   style_info->font_weight = FPDFText_GetFontWeight(text_page, char_index);
324   // As defined in PDF 1.7 table 5.20.
325   constexpr int kFlagItalic = (1 << 6);
326   // Bold text is considered bold when greater than or equal to 700.
327   constexpr int kStandardBoldValue = 700;
328   style_info->is_italic = (flags & kFlagItalic);
329   style_info->is_bold = style_info->font_weight >= kStandardBoldValue;
330   unsigned int fill_r;
331   unsigned int fill_g;
332   unsigned int fill_b;
333   unsigned int fill_a;
334   if (FPDFText_GetFillColor(text_page, char_index, &fill_r, &fill_g, &fill_b,
335                             &fill_a)) {
336     style_info->fill_color = MakeARGB(fill_a, fill_r, fill_g, fill_b);
337   } else {
338     style_info->fill_color = MakeARGB(0xff, 0, 0, 0);
339   }
340 
341   unsigned int stroke_r;
342   unsigned int stroke_g;
343   unsigned int stroke_b;
344   unsigned int stroke_a;
345   if (FPDFText_GetStrokeColor(text_page, char_index, &stroke_r, &stroke_g,
346                               &stroke_b, &stroke_a)) {
347     style_info->stroke_color = MakeARGB(stroke_a, stroke_r, stroke_g, stroke_b);
348   } else {
349     style_info->stroke_color = MakeARGB(0xff, 0, 0, 0);
350   }
351 
352   int render_mode = FPDFText_GetTextRenderMode(text_page, char_index);
353   if (render_mode < 0 || render_mode > PP_TEXTRENDERINGMODE_LAST) {
354     style_info->render_mode = PP_TEXTRENDERINGMODE_UNKNOWN;
355   } else {
356     style_info->render_mode = static_cast<PP_TextRenderingMode>(render_mode);
357   }
358 }
359 
AreTextStyleEqual(int char_index,const pp::PDF::PrivateAccessibilityTextStyleInfo & style)360 bool PDFiumPage::AreTextStyleEqual(
361     int char_index,
362     const pp::PDF::PrivateAccessibilityTextStyleInfo& style) {
363   pp::PDF::PrivateAccessibilityTextStyleInfo char_style;
364   CalculateTextRunStyleInfo(char_index, &char_style);
365   return char_style.font_name == style.font_name &&
366          char_style.font_weight == style.font_weight &&
367          char_style.render_mode == style.render_mode &&
368          FloatEquals(char_style.font_size, style.font_size) &&
369          char_style.fill_color == style.fill_color &&
370          char_style.stroke_color == style.stroke_color &&
371          char_style.is_italic == style.is_italic &&
372          char_style.is_bold == style.is_bold;
373 }
374 
375 base::Optional<pp::PDF::PrivateAccessibilityTextRunInfo>
GetTextRunInfo(int start_char_index)376 PDFiumPage::GetTextRunInfo(int start_char_index) {
377   FPDF_PAGE page = GetPage();
378   FPDF_TEXTPAGE text_page = GetTextPage();
379   int chars_count = FPDFText_CountChars(text_page);
380   // Check to make sure |start_char_index| is within bounds.
381   if (start_char_index < 0 || start_char_index >= chars_count)
382     return base::nullopt;
383 
384   int actual_start_char_index = GetFirstNonUnicodeWhiteSpaceCharIndex(
385       text_page, start_char_index, chars_count);
386   // Check to see if GetFirstNonUnicodeWhiteSpaceCharIndex() iterated through
387   // all the characters.
388   if (actual_start_char_index >= chars_count) {
389     // If so, |info.len| needs to take the number of characters
390     // iterated into account.
391     DCHECK_GT(actual_start_char_index, start_char_index);
392     pp::PDF::PrivateAccessibilityTextRunInfo info;
393     info.len = chars_count - start_char_index;
394     info.bounds = pp::FloatRect();
395     info.direction = PP_PRIVATEDIRECTION_NONE;
396     return info;
397   }
398   int char_index = actual_start_char_index;
399 
400   // Set text run's style info from the first character of the text run.
401   pp::PDF::PrivateAccessibilityTextRunInfo info;
402   CalculateTextRunStyleInfo(char_index, &info.style);
403 
404   pp::FloatRect start_char_rect =
405       GetFloatCharRectInPixels(page, text_page, char_index);
406   float text_run_font_size = info.style.font_size;
407 
408   // Heuristic: Initialize the average character size to one-third of the font
409   // size to avoid having the first few characters misrepresent the average.
410   // Without it, if a text run starts with a '.', its small bounding box could
411   // lead to a break in the text run after only one space. Ex: ". Hello World"
412   // would be split in two runs: "." and "Hello World".
413   double font_size_minimum = FPDFText_GetFontSize(text_page, char_index) / 3.0;
414   pp::FloatSize avg_char_size =
415       pp::FloatSize(font_size_minimum, font_size_minimum);
416   int non_whitespace_chars_count = 1;
417   AddCharSizeToAverageCharSize(start_char_rect.Floatsize(), &avg_char_size,
418                                &non_whitespace_chars_count);
419 
420   // Add first char to text run.
421   pp::FloatRect text_run_bounds = start_char_rect;
422   PP_PrivateDirection char_direction =
423       GetDirectionFromAngle(FPDFText_GetCharAngle(text_page, char_index));
424   if (char_index < chars_count)
425     char_index++;
426 
427   pp::FloatRect prev_char_rect = start_char_rect;
428   float estimated_font_size =
429       std::max(start_char_rect.width(), start_char_rect.height());
430 
431   // The angle of the vector starting at the first character center-point and
432   // ending at the current last character center-point.
433   float text_run_angle = 0;
434 
435   CalculatePageObjectTextRunBreaks();
436   const auto breakpoint_iter =
437       std::lower_bound(page_object_text_run_breaks_.begin(),
438                        page_object_text_run_breaks_.end(), char_index);
439   int breakpoint_index = breakpoint_iter != page_object_text_run_breaks_.end()
440                              ? *breakpoint_iter
441                              : -1;
442 
443   // Continue adding characters until heuristics indicate we should end the text
444   // run.
445   while (char_index < chars_count) {
446     // Split a text run when it encounters a page object like links or images.
447     if (char_index == breakpoint_index)
448       break;
449 
450     unsigned int character = FPDFText_GetUnicode(text_page, char_index);
451     pp::FloatRect char_rect =
452         GetFloatCharRectInPixels(page, text_page, char_index);
453 
454     if (!base::IsUnicodeWhitespace(character)) {
455       // Heuristic: End the text run if the text style of the current character
456       // is different from the text run's style.
457       if (!AreTextStyleEqual(char_index, info.style))
458         break;
459 
460       // Heuristic: End text run if character isn't going in the same direction.
461       if (char_direction !=
462           GetDirectionFromAngle(FPDFText_GetCharAngle(text_page, char_index)))
463         break;
464 
465       // Heuristic: End the text run if the difference between the text run
466       // angle and the angle between the center-points of the previous and
467       // current characters is greater than 90 degrees.
468       float current_angle = GetAngleOfVector(char_rect.CenterPoint() -
469                                              prev_char_rect.CenterPoint());
470       if (start_char_rect != prev_char_rect) {
471         text_run_angle = GetAngleOfVector(prev_char_rect.CenterPoint() -
472                                           start_char_rect.CenterPoint());
473 
474         if (GetAngleDifference(text_run_angle, current_angle) >
475             k90DegreesInRadians) {
476           break;
477         }
478       }
479 
480       // Heuristic: End the text run if the center-point distance to the
481       // previous character is less than 2.5x the average character size.
482       AddCharSizeToAverageCharSize(char_rect.Floatsize(), &avg_char_size,
483                                    &non_whitespace_chars_count);
484 
485       float avg_char_width = GetRotatedCharWidth(current_angle, avg_char_size);
486 
487       float distance =
488           GetDistanceBetweenPoints(char_rect.CenterPoint(),
489                                    prev_char_rect.CenterPoint()) -
490           GetRotatedCharWidth(current_angle, char_rect.Floatsize()) / 2 -
491           GetRotatedCharWidth(current_angle, prev_char_rect.Floatsize()) / 2;
492 
493       if (distance > 2.5f * avg_char_width)
494         break;
495 
496       text_run_bounds = text_run_bounds.Union(char_rect);
497       prev_char_rect = char_rect;
498     }
499 
500     if (!char_rect.IsEmpty()) {
501       // Update the estimated font size if needed.
502       float char_largest_side = std::max(char_rect.height(), char_rect.width());
503       estimated_font_size = std::max(char_largest_side, estimated_font_size);
504     }
505 
506     char_index++;
507   }
508 
509   // Some PDFs have missing or obviously bogus font sizes; substitute the
510   // font size by the width or height (whichever's the largest) of the bigger
511   // character in the current text run.
512   if (text_run_font_size <= 1 || text_run_font_size < estimated_font_size / 2 ||
513       text_run_font_size > estimated_font_size * 2) {
514     text_run_font_size = estimated_font_size;
515   }
516 
517   info.len = char_index - start_char_index;
518   info.style.font_size = text_run_font_size;
519   info.bounds = text_run_bounds;
520   // Infer text direction from first and last character of the text run. We
521   // can't base our decision on the character direction, since a character of a
522   // RTL language will have an angle of 0 when not rotated, just like a
523   // character in a LTR language.
524   info.direction = char_index - actual_start_char_index > 1
525                        ? GetDirectionFromAngle(text_run_angle)
526                        : PP_PRIVATEDIRECTION_NONE;
527   return info;
528 }
529 
GetCharUnicode(int char_index)530 uint32_t PDFiumPage::GetCharUnicode(int char_index) {
531   FPDF_TEXTPAGE text_page = GetTextPage();
532   return FPDFText_GetUnicode(text_page, char_index);
533 }
534 
GetCharBounds(int char_index)535 pp::FloatRect PDFiumPage::GetCharBounds(int char_index) {
536   FPDF_PAGE page = GetPage();
537   FPDF_TEXTPAGE text_page = GetTextPage();
538   return GetFloatCharRectInPixels(page, text_page, char_index);
539 }
540 
GetLinkInfo()541 std::vector<PDFEngine::AccessibilityLinkInfo> PDFiumPage::GetLinkInfo() {
542   std::vector<PDFEngine::AccessibilityLinkInfo> link_info;
543   if (!available_)
544     return link_info;
545 
546   CalculateLinks();
547 
548   link_info.reserve(links_.size());
549   for (const Link& link : links_) {
550     PDFEngine::AccessibilityLinkInfo cur_info;
551     cur_info.url = link.target.url;
552     cur_info.start_char_index = link.start_char_index;
553     cur_info.char_count = link.char_count;
554 
555     pp::Rect link_rect;
556     for (const auto& rect : link.bounding_rects)
557       link_rect = link_rect.Union(rect);
558     cur_info.bounds = pp::FloatRect(link_rect.x(), link_rect.y(),
559                                     link_rect.width(), link_rect.height());
560 
561     link_info.push_back(std::move(cur_info));
562   }
563   return link_info;
564 }
565 
GetImageInfo()566 std::vector<PDFEngine::AccessibilityImageInfo> PDFiumPage::GetImageInfo() {
567   std::vector<PDFEngine::AccessibilityImageInfo> image_info;
568   if (!available_)
569     return image_info;
570 
571   CalculateImages();
572 
573   image_info.reserve(images_.size());
574   for (const Image& image : images_) {
575     PDFEngine::AccessibilityImageInfo cur_info;
576     cur_info.alt_text = image.alt_text;
577     cur_info.bounds = pp::FloatRect(
578         image.bounding_rect.x(), image.bounding_rect.y(),
579         image.bounding_rect.width(), image.bounding_rect.height());
580     image_info.push_back(std::move(cur_info));
581   }
582   return image_info;
583 }
584 
585 std::vector<PDFEngine::AccessibilityHighlightInfo>
GetHighlightInfo()586 PDFiumPage::GetHighlightInfo() {
587   std::vector<PDFEngine::AccessibilityHighlightInfo> highlight_info;
588   if (!available_)
589     return highlight_info;
590 
591   PopulateAnnotations();
592 
593   highlight_info.reserve(highlights_.size());
594   for (const Highlight& highlight : highlights_) {
595     PDFEngine::AccessibilityHighlightInfo cur_info;
596     cur_info.start_char_index = highlight.start_char_index;
597     cur_info.char_count = highlight.char_count;
598     cur_info.bounds = pp::FloatRect(
599         highlight.bounding_rect.x(), highlight.bounding_rect.y(),
600         highlight.bounding_rect.width(), highlight.bounding_rect.height());
601     cur_info.color = highlight.color;
602     highlight_info.push_back(std::move(cur_info));
603   }
604   return highlight_info;
605 }
606 
607 std::vector<PDFEngine::AccessibilityTextFieldInfo>
GetTextFieldInfo()608 PDFiumPage::GetTextFieldInfo() {
609   std::vector<PDFEngine::AccessibilityTextFieldInfo> text_field_info;
610   if (!available_)
611     return text_field_info;
612 
613   PopulateAnnotations();
614 
615   text_field_info.reserve(text_fields_.size());
616   for (const TextField& text_field : text_fields_) {
617     PDFEngine::AccessibilityTextFieldInfo cur_info;
618     cur_info.name = text_field.name;
619     cur_info.value = text_field.value;
620     cur_info.is_read_only = !!(text_field.flags & FPDF_FORMFLAG_READONLY);
621     cur_info.is_required = !!(text_field.flags & FPDF_FORMFLAG_REQUIRED);
622     cur_info.is_password = !!(text_field.flags & FPDF_FORMFLAG_TEXT_PASSWORD);
623     cur_info.bounds = pp::FloatRect(
624         text_field.bounding_rect.x(), text_field.bounding_rect.y(),
625         text_field.bounding_rect.width(), text_field.bounding_rect.height());
626     text_field_info.push_back(std::move(cur_info));
627   }
628   return text_field_info;
629 }
630 
GetLinkTargetAtIndex(int link_index,LinkTarget * target)631 PDFiumPage::Area PDFiumPage::GetLinkTargetAtIndex(int link_index,
632                                                   LinkTarget* target) {
633   if (!available_ || link_index < 0)
634     return NONSELECTABLE_AREA;
635   CalculateLinks();
636   if (link_index >= static_cast<int>(links_.size()))
637     return NONSELECTABLE_AREA;
638   *target = links_[link_index].target;
639   return target->url.empty() ? DOCLINK_AREA : WEBLINK_AREA;
640 }
641 
GetCharIndex(const pp::Point & point,PageOrientation orientation,int * char_index,int * form_type,LinkTarget * target)642 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point,
643                                           PageOrientation orientation,
644                                           int* char_index,
645                                           int* form_type,
646                                           LinkTarget* target) {
647   if (!available_)
648     return NONSELECTABLE_AREA;
649   pp::Point point2 = point - rect_.point();
650   double new_x;
651   double new_y;
652   FPDF_BOOL ret = FPDF_DeviceToPage(
653       GetPage(), 0, 0, rect_.width(), rect_.height(),
654       ToPDFiumRotation(orientation), point2.x(), point2.y(), &new_x, &new_y);
655   DCHECK(ret);
656 
657   // hit detection tolerance, in points.
658   constexpr double kTolerance = 20.0;
659   int rv = FPDFText_GetCharIndexAtPos(GetTextPage(), new_x, new_y, kTolerance,
660                                       kTolerance);
661   *char_index = rv;
662 
663   FPDF_LINK link = FPDFLink_GetLinkAtPoint(GetPage(), new_x, new_y);
664   int control =
665       FPDFPage_HasFormFieldAtPoint(engine_->form(), GetPage(), new_x, new_y);
666 
667   // If there is a control and link at the same point, figure out their z-order
668   // to determine which is on top.
669   if (link && control > FPDF_FORMFIELD_UNKNOWN) {
670     int control_z_order = FPDFPage_FormFieldZOrderAtPoint(
671         engine_->form(), GetPage(), new_x, new_y);
672     int link_z_order = FPDFLink_GetLinkZOrderAtPoint(GetPage(), new_x, new_y);
673     DCHECK_NE(control_z_order, link_z_order);
674     if (control_z_order > link_z_order) {
675       *form_type = control;
676       return FormTypeToArea(*form_type);
677     }
678 
679     // We don't handle all possible link types of the PDF. For example,
680     // launch actions, cross-document links, etc.
681     // In that case, GetLinkTarget() will return NONSELECTABLE_AREA
682     // and we should proceed with area detection.
683     Area area = GetLinkTarget(link, target);
684     if (area != NONSELECTABLE_AREA)
685       return area;
686   } else if (link) {
687     // We don't handle all possible link types of the PDF. For example,
688     // launch actions, cross-document links, etc.
689     // See identical block above.
690     Area area = GetLinkTarget(link, target);
691     if (area != NONSELECTABLE_AREA)
692       return area;
693   } else if (control > FPDF_FORMFIELD_UNKNOWN) {
694     *form_type = control;
695     return FormTypeToArea(*form_type);
696   }
697 
698   if (rv < 0)
699     return NONSELECTABLE_AREA;
700 
701   return GetLink(*char_index, target) != -1 ? WEBLINK_AREA : TEXT_AREA;
702 }
703 
704 // static
FormTypeToArea(int form_type)705 PDFiumPage::Area PDFiumPage::FormTypeToArea(int form_type) {
706   switch (form_type) {
707     case FPDF_FORMFIELD_COMBOBOX:
708     case FPDF_FORMFIELD_TEXTFIELD:
709 #if defined(PDF_ENABLE_XFA)
710     // TODO(bug_353450): figure out selection and copying for XFA fields.
711     case FPDF_FORMFIELD_XFA_COMBOBOX:
712     case FPDF_FORMFIELD_XFA_TEXTFIELD:
713 #endif
714       return FORM_TEXT_AREA;
715     default:
716       return NONSELECTABLE_AREA;
717   }
718 }
719 
GetCharAtIndex(int index)720 base::char16 PDFiumPage::GetCharAtIndex(int index) {
721   if (!available_)
722     return L'\0';
723   return static_cast<base::char16>(FPDFText_GetUnicode(GetTextPage(), index));
724 }
725 
GetCharCount()726 int PDFiumPage::GetCharCount() {
727   if (!available_)
728     return 0;
729   return FPDFText_CountChars(GetTextPage());
730 }
731 
IsCharIndexInBounds(int index)732 bool PDFiumPage::IsCharIndexInBounds(int index) {
733   return index >= 0 && index < GetCharCount();
734 }
735 
GetLinkTarget(FPDF_LINK link,LinkTarget * target)736 PDFiumPage::Area PDFiumPage::GetLinkTarget(FPDF_LINK link, LinkTarget* target) {
737   FPDF_DEST dest_link = FPDFLink_GetDest(engine_->doc(), link);
738   if (dest_link)
739     return GetDestinationTarget(dest_link, target);
740 
741   FPDF_ACTION action = FPDFLink_GetAction(link);
742   if (!action)
743     return NONSELECTABLE_AREA;
744 
745   switch (FPDFAction_GetType(action)) {
746     case PDFACTION_GOTO: {
747       FPDF_DEST dest_action = FPDFAction_GetDest(engine_->doc(), action);
748       if (dest_action)
749         return GetDestinationTarget(dest_action, target);
750       // TODO(crbug.com/55776): We don't fully support all types of the
751       // in-document links.
752       return NONSELECTABLE_AREA;
753     }
754     case PDFACTION_URI:
755       return GetURITarget(action, target);
756     // TODO(crbug.com/767191): Support PDFACTION_LAUNCH.
757     // TODO(crbug.com/142344): Support PDFACTION_REMOTEGOTO.
758     case PDFACTION_LAUNCH:
759     case PDFACTION_REMOTEGOTO:
760     default:
761       return NONSELECTABLE_AREA;
762   }
763 }
764 
GetDestinationTarget(FPDF_DEST destination,LinkTarget * target)765 PDFiumPage::Area PDFiumPage::GetDestinationTarget(FPDF_DEST destination,
766                                                   LinkTarget* target) {
767   if (!target)
768     return NONSELECTABLE_AREA;
769 
770   int page_index = FPDFDest_GetDestPageIndex(engine_->doc(), destination);
771   if (page_index < 0)
772     return NONSELECTABLE_AREA;
773 
774   target->page = page_index;
775 
776   base::Optional<gfx::PointF> xy;
777   GetPageDestinationTarget(destination, &xy, &target->zoom);
778   if (xy) {
779     gfx::PointF point = TransformPageToScreenXY(xy.value());
780     target->x_in_pixels = point.x();
781     target->y_in_pixels = point.y();
782   }
783 
784   return DOCLINK_AREA;
785 }
786 
GetPageDestinationTarget(FPDF_DEST destination,base::Optional<gfx::PointF> * xy,base::Optional<float> * zoom_value)787 void PDFiumPage::GetPageDestinationTarget(FPDF_DEST destination,
788                                           base::Optional<gfx::PointF>* xy,
789                                           base::Optional<float>* zoom_value) {
790   *xy = base::nullopt;
791   *zoom_value = base::nullopt;
792   if (!available_)
793     return;
794 
795   FPDF_BOOL has_x_coord;
796   FPDF_BOOL has_y_coord;
797   FPDF_BOOL has_zoom;
798   FS_FLOAT x;
799   FS_FLOAT y;
800   FS_FLOAT zoom;
801   FPDF_BOOL success = FPDFDest_GetLocationInPage(
802       destination, &has_x_coord, &has_y_coord, &has_zoom, &x, &y, &zoom);
803 
804   if (!success)
805     return;
806 
807   if (has_x_coord && has_y_coord)
808     *xy = gfx::PointF(x, y);
809 
810   if (has_zoom)
811     *zoom_value = zoom;
812 }
813 
TransformPageToScreenXY(const gfx::PointF & xy)814 gfx::PointF PDFiumPage::TransformPageToScreenXY(const gfx::PointF& xy) {
815   if (!available_)
816     return gfx::PointF();
817 
818   pp::FloatRect page_rect(xy.x(), xy.y(), 0, 0);
819   pp::FloatRect pixel_rect(FloatPageRectToPixelRect(GetPage(), page_rect));
820   return gfx::PointF(pixel_rect.x(), pixel_rect.y());
821 }
822 
GetURITarget(FPDF_ACTION uri_action,LinkTarget * target) const823 PDFiumPage::Area PDFiumPage::GetURITarget(FPDF_ACTION uri_action,
824                                           LinkTarget* target) const {
825   if (target) {
826     size_t buffer_size =
827         FPDFAction_GetURIPath(engine_->doc(), uri_action, nullptr, 0);
828     if (buffer_size > 0) {
829       PDFiumAPIStringBufferAdapter<std::string> api_string_adapter(
830           &target->url, buffer_size, true);
831       void* data = api_string_adapter.GetData();
832       size_t bytes_written =
833           FPDFAction_GetURIPath(engine_->doc(), uri_action, data, buffer_size);
834       api_string_adapter.Close(bytes_written);
835     }
836   }
837   return WEBLINK_AREA;
838 }
839 
GetLink(int char_index,LinkTarget * target)840 int PDFiumPage::GetLink(int char_index, LinkTarget* target) {
841   if (!available_)
842     return -1;
843 
844   CalculateLinks();
845 
846   // Get the bounding box of the rect again, since it might have moved because
847   // of the tolerance above.
848   double left;
849   double right;
850   double bottom;
851   double top;
852   if (!FPDFText_GetCharBox(GetTextPage(), char_index, &left, &right, &bottom,
853                            &top)) {
854     return -1;
855   }
856 
857   pp::Point origin(PageToScreen(pp::Point(), 1.0, left, top, right, bottom,
858                                 PageOrientation::kOriginal)
859                        .point());
860   for (size_t i = 0; i < links_.size(); ++i) {
861     for (const auto& rect : links_[i].bounding_rects) {
862       if (rect.Contains(origin)) {
863         if (target)
864           target->url = links_[i].target.url;
865         return i;
866       }
867     }
868   }
869   return -1;
870 }
871 
CalculateLinks()872 void PDFiumPage::CalculateLinks() {
873   if (calculated_links_)
874     return;
875 
876   calculated_links_ = true;
877   PopulateWebLinks();
878   PopulateAnnotationLinks();
879 }
880 
PopulateWebLinks()881 void PDFiumPage::PopulateWebLinks() {
882   ScopedFPDFPageLink links(FPDFLink_LoadWebLinks(GetTextPage()));
883   int count = FPDFLink_CountWebLinks(links.get());
884   for (int i = 0; i < count; ++i) {
885     base::string16 url;
886     int url_length = FPDFLink_GetURL(links.get(), i, nullptr, 0);
887     if (url_length > 0) {
888       PDFiumAPIStringBufferAdapter<base::string16> api_string_adapter(
889           &url, url_length, true);
890       unsigned short* data =
891           reinterpret_cast<unsigned short*>(api_string_adapter.GetData());
892       int actual_length = FPDFLink_GetURL(links.get(), i, data, url_length);
893       api_string_adapter.Close(actual_length);
894     }
895     Link link;
896     link.target.url = base::UTF16ToUTF8(url);
897 
898     IsValidLinkFunction is_valid_link_func =
899         g_is_valid_link_func_for_testing ? g_is_valid_link_func_for_testing
900                                          : &IsValidLink;
901     if (!is_valid_link_func(link.target.url))
902       continue;
903 
904     // Make sure all the characters in the URL are valid per RFC 1738.
905     // http://crbug.com/340326 has a sample bad PDF.
906     // GURL does not work correctly, e.g. it just strips \t \r \n.
907     bool is_invalid_url = false;
908     for (size_t j = 0; j < link.target.url.length(); ++j) {
909       // Control characters are not allowed.
910       // 0x7F is also a control character.
911       // 0x80 and above are not in US-ASCII.
912       if (link.target.url[j] < ' ' || link.target.url[j] >= '\x7F') {
913         is_invalid_url = true;
914         break;
915       }
916     }
917     if (is_invalid_url)
918       continue;
919 
920     int rect_count = FPDFLink_CountRects(links.get(), i);
921     for (int j = 0; j < rect_count; ++j) {
922       double left;
923       double top;
924       double right;
925       double bottom;
926       FPDFLink_GetRect(links.get(), i, j, &left, &top, &right, &bottom);
927       pp::Rect rect = PageToScreen(pp::Point(), 1.0, left, top, right, bottom,
928                                    PageOrientation::kOriginal);
929       if (rect.IsEmpty())
930         continue;
931       link.bounding_rects.push_back(rect);
932     }
933     FPDF_BOOL is_link_over_text = FPDFLink_GetTextRange(
934         links.get(), i, &link.start_char_index, &link.char_count);
935     DCHECK(is_link_over_text);
936     links_.push_back(link);
937   }
938 }
939 
PopulateAnnotationLinks()940 void PDFiumPage::PopulateAnnotationLinks() {
941   int start_pos = 0;
942   FPDF_LINK link_annot;
943   FPDF_PAGE page = GetPage();
944   while (FPDFLink_Enumerate(page, &start_pos, &link_annot)) {
945     Link link;
946     Area area = GetLinkTarget(link_annot, &link.target);
947     if (area == NONSELECTABLE_AREA)
948       continue;
949 
950     FS_RECTF link_rect;
951     if (!FPDFLink_GetAnnotRect(link_annot, &link_rect))
952       continue;
953 
954     // The horizontal/vertical coordinates in PDF Links could be
955     // flipped. Swap the coordinates before further processing.
956     if (link_rect.right < link_rect.left)
957       std::swap(link_rect.right, link_rect.left);
958     if (link_rect.bottom > link_rect.top)
959       std::swap(link_rect.bottom, link_rect.top);
960 
961     int quad_point_count = FPDFLink_CountQuadPoints(link_annot);
962     // Calculate the bounds of link using the quad points data.
963     // If quad points for link is not present then use
964     // |link_rect| to calculate the bounds instead.
965     if (quad_point_count > 0) {
966       for (int i = 0; i < quad_point_count; ++i) {
967         FS_QUADPOINTSF point;
968         if (FPDFLink_GetQuadPoints(link_annot, i, &point)) {
969           // PDF Specifications: Quadpoints start from bottom left (x1, y1) and
970           // runs counter clockwise.
971           link.bounding_rects.push_back(
972               PageToScreen(pp::Point(), 1.0, point.x4, point.y4, point.x2,
973                            point.y2, PageOrientation::kOriginal));
974         }
975       }
976     } else {
977       link.bounding_rects.push_back(PageToScreen(
978           pp::Point(), 1.0, link_rect.left, link_rect.top, link_rect.right,
979           link_rect.bottom, PageOrientation::kOriginal));
980     }
981 
982     // Calculate underlying text range of link.
983     GetUnderlyingTextRangeForRect(
984         pp::FloatRect(link_rect.left, link_rect.bottom,
985                       std::abs(link_rect.right - link_rect.left),
986                       std::abs(link_rect.bottom - link_rect.top)),
987         &link.start_char_index, &link.char_count);
988     links_.emplace_back(link);
989   }
990 }
991 
CalculateImages()992 void PDFiumPage::CalculateImages() {
993   if (calculated_images_)
994     return;
995 
996   calculated_images_ = true;
997   FPDF_PAGE page = GetPage();
998   int page_object_count = FPDFPage_CountObjects(page);
999   MarkedContentIdToImageMap marked_content_id_image_map;
1000   bool is_tagged = FPDFCatalog_IsTagged(engine_->doc());
1001   for (int i = 0; i < page_object_count; ++i) {
1002     FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i);
1003     if (FPDFPageObj_GetType(page_object) != FPDF_PAGEOBJ_IMAGE)
1004       continue;
1005     float left;
1006     float top;
1007     float right;
1008     float bottom;
1009     FPDF_BOOL ret =
1010         FPDFPageObj_GetBounds(page_object, &left, &bottom, &right, &top);
1011     DCHECK(ret);
1012     Image image;
1013     image.bounding_rect = PageToScreen(pp::Point(), 1.0, left, top, right,
1014                                        bottom, PageOrientation::kOriginal);
1015 
1016     if (is_tagged) {
1017       // Collect all marked content IDs for image objects so that they can
1018       // later be used to retrieve alt text from struct tree for the page.
1019       FPDF_IMAGEOBJ_METADATA image_metadata;
1020       if (FPDFImageObj_GetImageMetadata(page_object, page, &image_metadata)) {
1021         int marked_content_id = image_metadata.marked_content_id;
1022         if (marked_content_id >= 0) {
1023           // If |marked_content_id| is already present, ignore the one being
1024           // inserted.
1025           marked_content_id_image_map.insert(
1026               {marked_content_id, images_.size()});
1027         }
1028       }
1029     }
1030     images_.push_back(image);
1031   }
1032 
1033   if (!marked_content_id_image_map.empty())
1034     PopulateImageAltText(marked_content_id_image_map);
1035 }
1036 
PopulateImageAltText(const MarkedContentIdToImageMap & marked_content_id_image_map)1037 void PDFiumPage::PopulateImageAltText(
1038     const MarkedContentIdToImageMap& marked_content_id_image_map) {
1039   ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(GetPage()));
1040   if (!struct_tree)
1041     return;
1042 
1043   std::set<FPDF_STRUCTELEMENT> visited_elements;
1044   int tree_children_count = FPDF_StructTree_CountChildren(struct_tree.get());
1045   for (int i = 0; i < tree_children_count; ++i) {
1046     FPDF_STRUCTELEMENT current_element =
1047         FPDF_StructTree_GetChildAtIndex(struct_tree.get(), i);
1048     PopulateImageAltTextForStructElement(marked_content_id_image_map,
1049                                          current_element, &visited_elements);
1050   }
1051 }
1052 
PopulateImageAltTextForStructElement(const MarkedContentIdToImageMap & marked_content_id_image_map,FPDF_STRUCTELEMENT current_element,std::set<FPDF_STRUCTELEMENT> * visited_elements)1053 void PDFiumPage::PopulateImageAltTextForStructElement(
1054     const MarkedContentIdToImageMap& marked_content_id_image_map,
1055     FPDF_STRUCTELEMENT current_element,
1056     std::set<FPDF_STRUCTELEMENT>* visited_elements) {
1057   if (!current_element)
1058     return;
1059 
1060   bool inserted = visited_elements->insert(current_element).second;
1061   if (!inserted)
1062     return;
1063 
1064   int marked_content_id =
1065       FPDF_StructElement_GetMarkedContentID(current_element);
1066   if (marked_content_id >= 0) {
1067     auto it = marked_content_id_image_map.find(marked_content_id);
1068     if (it != marked_content_id_image_map.end() &&
1069         images_[it->second].alt_text.empty()) {
1070       size_t buffer_size =
1071           FPDF_StructElement_GetAltText(current_element, nullptr, 0);
1072       if (buffer_size > 0) {
1073         base::string16 alt_text;
1074         PDFiumAPIStringBufferSizeInBytesAdapter<base::string16>
1075             api_string_adapter(&alt_text, buffer_size, true);
1076         api_string_adapter.Close(FPDF_StructElement_GetAltText(
1077             current_element, api_string_adapter.GetData(), buffer_size));
1078         images_[it->second].alt_text = base::UTF16ToUTF8(alt_text);
1079       }
1080     }
1081   }
1082   int children_count = FPDF_StructElement_CountChildren(current_element);
1083   for (int i = 0; i < children_count; ++i) {
1084     FPDF_STRUCTELEMENT child =
1085         FPDF_StructElement_GetChildAtIndex(current_element, i);
1086     PopulateImageAltTextForStructElement(marked_content_id_image_map, child,
1087                                          visited_elements);
1088   }
1089 }
1090 
PopulateAnnotations()1091 void PDFiumPage::PopulateAnnotations() {
1092   if (calculated_annotations_)
1093     return;
1094 
1095   FPDF_PAGE page = GetPage();
1096   if (!page)
1097     return;
1098 
1099   int annotation_count = FPDFPage_GetAnnotCount(page);
1100   for (int i = 0; i < annotation_count; ++i) {
1101     ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
1102     DCHECK(annot);
1103     FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
1104 
1105     switch (subtype) {
1106       case FPDF_ANNOT_HIGHLIGHT: {
1107         PopulateHighlight(annot.get());
1108         break;
1109       }
1110       case FPDF_ANNOT_WIDGET: {
1111         // TODO(crbug.com/1030242): Populate other types of form fields too.
1112         if (FPDFAnnot_GetFormFieldType(engine_->form(), annot.get()) ==
1113             FPDF_FORMFIELD_TEXTFIELD) {
1114           PopulateTextField(annot.get());
1115         }
1116         break;
1117       }
1118       default:
1119         break;
1120     }
1121   }
1122   calculated_annotations_ = true;
1123 }
1124 
PopulateHighlight(FPDF_ANNOTATION annot)1125 void PDFiumPage::PopulateHighlight(FPDF_ANNOTATION annot) {
1126   DCHECK(annot);
1127   DCHECK_EQ(FPDFAnnot_GetSubtype(annot), FPDF_ANNOT_HIGHLIGHT);
1128 
1129   FS_RECTF rect;
1130   if (!FPDFAnnot_GetRect(annot, &rect))
1131     return;
1132 
1133   Highlight highlight;
1134   // We use the bounding box of the highlight as the bounding rect.
1135   highlight.bounding_rect =
1136       PageToScreen(pp::Point(), 1.0, rect.left, rect.top, rect.right,
1137                    rect.bottom, PageOrientation::kOriginal);
1138   GetUnderlyingTextRangeForRect(
1139       pp::FloatRect(rect.left, rect.bottom, std::abs(rect.right - rect.left),
1140                     std::abs(rect.bottom - rect.top)),
1141       &highlight.start_char_index, &highlight.char_count);
1142 
1143   // Retrieve the color of the highlight.
1144   unsigned int color_r;
1145   unsigned int color_g;
1146   unsigned int color_b;
1147   unsigned int color_a;
1148   FPDF_PAGEOBJECT page_object = FPDFAnnot_GetObject(annot, 0);
1149   if (FPDFPageObj_GetFillColor(page_object, &color_r, &color_g, &color_b,
1150                                &color_a)) {
1151     highlight.color = MakeARGB(color_a, color_r, color_g, color_b);
1152   } else {
1153     // Set the same default color as in pdfium. See calls to
1154     // GetColorStringWithDefault() in CPVT_GenerateAP::Generate*AP() in
1155     // pdfium.
1156     highlight.color = MakeARGB(255, 255, 255, 0);
1157   }
1158 
1159   highlights_.push_back(std::move(highlight));
1160 }
1161 
PopulateTextField(FPDF_ANNOTATION annot)1162 void PDFiumPage::PopulateTextField(FPDF_ANNOTATION annot) {
1163   DCHECK(annot);
1164   FPDF_FORMHANDLE form_handle = engine_->form();
1165   DCHECK_EQ(FPDFAnnot_GetFormFieldType(form_handle, annot),
1166             FPDF_FORMFIELD_TEXTFIELD);
1167 
1168   FS_RECTF rect;
1169   if (!FPDFAnnot_GetRect(annot, &rect))
1170     return;
1171 
1172   TextField text_field;
1173   // We use the bounding box of the text field as the bounding rect.
1174   text_field.bounding_rect =
1175       PageToScreen(pp::Point(), 1.0, rect.left, rect.top, rect.right,
1176                    rect.bottom, PageOrientation::kOriginal);
1177   text_field.value = GetFormFieldProperty(
1178       base::BindRepeating(FPDFAnnot_GetFormFieldValue, form_handle, annot));
1179   text_field.name = GetFormFieldProperty(
1180       base::BindRepeating(FPDFAnnot_GetFormFieldName, form_handle, annot));
1181   text_field.flags = FPDFAnnot_GetFormFieldFlags(form_handle, annot);
1182   text_fields_.push_back(std::move(text_field));
1183 }
1184 
GetUnderlyingTextRangeForRect(const pp::FloatRect & rect,int * start_index,int * char_len)1185 bool PDFiumPage::GetUnderlyingTextRangeForRect(const pp::FloatRect& rect,
1186                                                int* start_index,
1187                                                int* char_len) {
1188   if (!available_)
1189     return false;
1190 
1191   FPDF_TEXTPAGE text_page = GetTextPage();
1192   int char_count = FPDFText_CountChars(text_page);
1193   if (char_count <= 0)
1194     return false;
1195 
1196   int start_char_index = -1;
1197   int cur_char_count = 0;
1198 
1199   // Iterate over page text to find such continuous characters whose mid-points
1200   // lie inside the rectangle.
1201   for (int i = 0; i < char_count; ++i) {
1202     double char_left;
1203     double char_right;
1204     double char_bottom;
1205     double char_top;
1206     if (!FPDFText_GetCharBox(text_page, i, &char_left, &char_right,
1207                              &char_bottom, &char_top)) {
1208       break;
1209     }
1210 
1211     float xmid = (char_left + char_right) / 2;
1212     float ymid = (char_top + char_bottom) / 2;
1213     if (rect.Contains(xmid, ymid)) {
1214       if (start_char_index == -1)
1215         start_char_index = i;
1216       ++cur_char_count;
1217     } else if (start_char_index != -1) {
1218       break;
1219     }
1220   }
1221 
1222   if (cur_char_count == 0)
1223     return false;
1224 
1225   *char_len = cur_char_count;
1226   *start_index = start_char_index;
1227   return true;
1228 }
1229 
PageToScreen(const pp::Point & offset,double zoom,double left,double top,double right,double bottom,PageOrientation orientation) const1230 pp::Rect PDFiumPage::PageToScreen(const pp::Point& offset,
1231                                   double zoom,
1232                                   double left,
1233                                   double top,
1234                                   double right,
1235                                   double bottom,
1236                                   PageOrientation orientation) const {
1237   if (!available_)
1238     return pp::Rect();
1239 
1240   double start_x = (rect_.x() - offset.x()) * zoom;
1241   double start_y = (rect_.y() - offset.y()) * zoom;
1242   double size_x = rect_.width() * zoom;
1243   double size_y = rect_.height() * zoom;
1244   if (!base::IsValueInRangeForNumericType<int>(start_x) ||
1245       !base::IsValueInRangeForNumericType<int>(start_y) ||
1246       !base::IsValueInRangeForNumericType<int>(size_x) ||
1247       !base::IsValueInRangeForNumericType<int>(size_y)) {
1248     return pp::Rect();
1249   }
1250 
1251   int new_left;
1252   int new_top;
1253   int new_right;
1254   int new_bottom;
1255   FPDF_BOOL ret = FPDF_PageToDevice(
1256       page(), static_cast<int>(start_x), static_cast<int>(start_y),
1257       static_cast<int>(ceil(size_x)), static_cast<int>(ceil(size_y)),
1258       ToPDFiumRotation(orientation), left, top, &new_left, &new_top);
1259   DCHECK(ret);
1260   ret = FPDF_PageToDevice(
1261       page(), static_cast<int>(start_x), static_cast<int>(start_y),
1262       static_cast<int>(ceil(size_x)), static_cast<int>(ceil(size_y)),
1263       ToPDFiumRotation(orientation), right, bottom, &new_right, &new_bottom);
1264   DCHECK(ret);
1265 
1266   // If the PDF is rotated, the horizontal/vertical coordinates could be
1267   // flipped.  See
1268   // http://www.netl.doe.gov/publications/proceedings/03/ubc/presentations/Goeckner-pres.pdf
1269   if (new_right < new_left)
1270     std::swap(new_right, new_left);
1271   if (new_bottom < new_top)
1272     std::swap(new_bottom, new_top);
1273 
1274   base::CheckedNumeric<int32_t> new_size_x = new_right;
1275   new_size_x -= new_left;
1276   new_size_x += 1;
1277   base::CheckedNumeric<int32_t> new_size_y = new_bottom;
1278   new_size_y -= new_top;
1279   new_size_y += 1;
1280   if (!new_size_x.IsValid() || !new_size_y.IsValid())
1281     return pp::Rect();
1282 
1283   return pp::Rect(new_left, new_top, new_size_x.ValueOrDie(),
1284                   new_size_y.ValueOrDie());
1285 }
1286 
ScopedUnloadPreventer(PDFiumPage * page)1287 PDFiumPage::ScopedUnloadPreventer::ScopedUnloadPreventer(PDFiumPage* page)
1288     : page_(page) {
1289   page_->preventing_unload_count_++;
1290 }
1291 
~ScopedUnloadPreventer()1292 PDFiumPage::ScopedUnloadPreventer::~ScopedUnloadPreventer() {
1293   page_->preventing_unload_count_--;
1294 }
1295 
1296 PDFiumPage::Link::Link() = default;
1297 
1298 PDFiumPage::Link::Link(const Link& that) = default;
1299 
1300 PDFiumPage::Link::~Link() = default;
1301 
1302 PDFiumPage::Image::Image() = default;
1303 
1304 PDFiumPage::Image::Image(const Image& that) = default;
1305 
1306 PDFiumPage::Image::~Image() = default;
1307 
1308 PDFiumPage::Highlight::Highlight() = default;
1309 
1310 PDFiumPage::Highlight::Highlight(const Highlight& that) = default;
1311 
1312 PDFiumPage::Highlight::~Highlight() = default;
1313 
1314 PDFiumPage::TextField::TextField() = default;
1315 
1316 PDFiumPage::TextField::TextField(const TextField& that) = default;
1317 
1318 PDFiumPage::TextField::~TextField() = default;
1319 
ToPDFiumRotation(PageOrientation orientation)1320 int ToPDFiumRotation(PageOrientation orientation) {
1321   // Could static_cast<int>(orientation), but using an exhaustive switch will
1322   // trigger an error if we ever change the definition of PageOrientation.
1323   switch (orientation) {
1324     case PageOrientation::kOriginal:
1325       return 0;
1326     case PageOrientation::kClockwise90:
1327       return 1;
1328     case PageOrientation::kClockwise180:
1329       return 2;
1330     case PageOrientation::kClockwise270:
1331       return 3;
1332   }
1333   NOTREACHED();
1334   return 0;
1335 }
1336 
1337 }  // namespace chrome_pdf
1338