1 // Copyright 2018 The PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "samples/pdfium_test_write_helper.h"
6 
7 #include <limits.h>
8 
9 #include <memory>
10 #include <string>
11 #include <utility>
12 #include <vector>
13 
14 #include "public/cpp/fpdf_scopers.h"
15 #include "public/fpdf_annot.h"
16 #include "public/fpdf_attachment.h"
17 #include "public/fpdf_edit.h"
18 #include "public/fpdf_thumbnail.h"
19 #include "testing/fx_string_testhelpers.h"
20 #include "testing/image_diff/image_diff_png.h"
21 #include "third_party/base/logging.h"
22 
23 namespace {
24 
CheckDimensions(int stride,int width,int height)25 bool CheckDimensions(int stride, int width, int height) {
26   if (stride < 0 || width < 0 || height < 0)
27     return false;
28   if (height > 0 && stride > INT_MAX / height)
29     return false;
30   return true;
31 }
32 
AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype)33 const char* AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype) {
34   if (subtype == FPDF_ANNOT_TEXT)
35     return "Text";
36   if (subtype == FPDF_ANNOT_LINK)
37     return "Link";
38   if (subtype == FPDF_ANNOT_FREETEXT)
39     return "FreeText";
40   if (subtype == FPDF_ANNOT_LINE)
41     return "Line";
42   if (subtype == FPDF_ANNOT_SQUARE)
43     return "Square";
44   if (subtype == FPDF_ANNOT_CIRCLE)
45     return "Circle";
46   if (subtype == FPDF_ANNOT_POLYGON)
47     return "Polygon";
48   if (subtype == FPDF_ANNOT_POLYLINE)
49     return "PolyLine";
50   if (subtype == FPDF_ANNOT_HIGHLIGHT)
51     return "Highlight";
52   if (subtype == FPDF_ANNOT_UNDERLINE)
53     return "Underline";
54   if (subtype == FPDF_ANNOT_SQUIGGLY)
55     return "Squiggly";
56   if (subtype == FPDF_ANNOT_STRIKEOUT)
57     return "StrikeOut";
58   if (subtype == FPDF_ANNOT_STAMP)
59     return "Stamp";
60   if (subtype == FPDF_ANNOT_CARET)
61     return "Caret";
62   if (subtype == FPDF_ANNOT_INK)
63     return "Ink";
64   if (subtype == FPDF_ANNOT_POPUP)
65     return "Popup";
66   if (subtype == FPDF_ANNOT_FILEATTACHMENT)
67     return "FileAttachment";
68   if (subtype == FPDF_ANNOT_SOUND)
69     return "Sound";
70   if (subtype == FPDF_ANNOT_MOVIE)
71     return "Movie";
72   if (subtype == FPDF_ANNOT_WIDGET)
73     return "Widget";
74   if (subtype == FPDF_ANNOT_SCREEN)
75     return "Screen";
76   if (subtype == FPDF_ANNOT_PRINTERMARK)
77     return "PrinterMark";
78   if (subtype == FPDF_ANNOT_TRAPNET)
79     return "TrapNet";
80   if (subtype == FPDF_ANNOT_WATERMARK)
81     return "Watermark";
82   if (subtype == FPDF_ANNOT_THREED)
83     return "3D";
84   if (subtype == FPDF_ANNOT_RICHMEDIA)
85     return "RichMedia";
86   if (subtype == FPDF_ANNOT_XFAWIDGET)
87     return "XFAWidget";
88   NOTREACHED();
89   return "";
90 }
91 
AppendFlagString(const char * flag,std::string * output)92 void AppendFlagString(const char* flag, std::string* output) {
93   if (!output->empty())
94     *output += ", ";
95   *output += flag;
96 }
97 
AnnotFlagsToString(int flags)98 std::string AnnotFlagsToString(int flags) {
99   std::string str;
100   if (flags & FPDF_ANNOT_FLAG_INVISIBLE)
101     AppendFlagString("Invisible", &str);
102   if (flags & FPDF_ANNOT_FLAG_HIDDEN)
103     AppendFlagString("Hidden", &str);
104   if (flags & FPDF_ANNOT_FLAG_PRINT)
105     AppendFlagString("Print", &str);
106   if (flags & FPDF_ANNOT_FLAG_NOZOOM)
107     AppendFlagString("NoZoom", &str);
108   if (flags & FPDF_ANNOT_FLAG_NOROTATE)
109     AppendFlagString("NoRotate", &str);
110   if (flags & FPDF_ANNOT_FLAG_NOVIEW)
111     AppendFlagString("NoView", &str);
112   if (flags & FPDF_ANNOT_FLAG_READONLY)
113     AppendFlagString("ReadOnly", &str);
114   if (flags & FPDF_ANNOT_FLAG_LOCKED)
115     AppendFlagString("Locked", &str);
116   if (flags & FPDF_ANNOT_FLAG_TOGGLENOVIEW)
117     AppendFlagString("ToggleNoView", &str);
118   return str;
119 }
120 
PageObjectTypeToCString(int type)121 const char* PageObjectTypeToCString(int type) {
122   if (type == FPDF_PAGEOBJ_TEXT)
123     return "Text";
124   if (type == FPDF_PAGEOBJ_PATH)
125     return "Path";
126   if (type == FPDF_PAGEOBJ_IMAGE)
127     return "Image";
128   if (type == FPDF_PAGEOBJ_SHADING)
129     return "Shading";
130   if (type == FPDF_PAGEOBJ_FORM)
131     return "Form";
132   NOTREACHED();
133   return "";
134 }
135 
EncodePng(pdfium::span<const uint8_t> input,int width,int height,int stride,int format)136 std::vector<uint8_t> EncodePng(pdfium::span<const uint8_t> input,
137                                int width,
138                                int height,
139                                int stride,
140                                int format) {
141   std::vector<uint8_t> png;
142   switch (format) {
143     case FPDFBitmap_Unknown:
144       break;
145     case FPDFBitmap_Gray:
146       png = image_diff_png::EncodeGrayPNG(input, width, height, stride);
147       break;
148     case FPDFBitmap_BGR:
149       png = image_diff_png::EncodeBGRPNG(input, width, height, stride);
150       break;
151     case FPDFBitmap_BGRx:
152       png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
153                                           /*discard_transparency=*/true);
154       break;
155     case FPDFBitmap_BGRA:
156       png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
157                                           /*discard_transparency=*/false);
158       break;
159     default:
160       NOTREACHED();
161   }
162   return png;
163 }
164 
165 #ifdef _WIN32
EnhMetaFileProc(HDC hdc,HANDLETABLE * handle_table,const ENHMETARECORD * record,int objects_count,LPARAM param)166 int CALLBACK EnhMetaFileProc(HDC hdc,
167                              HANDLETABLE* handle_table,
168                              const ENHMETARECORD* record,
169                              int objects_count,
170                              LPARAM param) {
171   std::vector<const ENHMETARECORD*>& items =
172       *reinterpret_cast<std::vector<const ENHMETARECORD*>*>(param);
173   items.push_back(record);
174   return 1;
175 }
176 #endif  // _WIN32
177 
178 }  // namespace
179 
WritePpm(const char * pdf_name,int num,void * buffer_void,int stride,int width,int height)180 std::string WritePpm(const char* pdf_name,
181                      int num,
182                      void* buffer_void,
183                      int stride,
184                      int width,
185                      int height) {
186   if (!CheckDimensions(stride, width, height))
187     return "";
188 
189   int out_len = width * height;
190   if (out_len > INT_MAX / 3)
191     return "";
192 
193   out_len *= 3;
194 
195   char filename[256];
196   snprintf(filename, sizeof(filename), "%s.%d.ppm", pdf_name, num);
197   FILE* fp = fopen(filename, "wb");
198   if (!fp)
199     return "";
200 
201   fprintf(fp, "P6\n# PDF test render\n%d %d\n255\n", width, height);
202   // Source data is B, G, R, unused.
203   // Dest data is R, G, B.
204   const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buffer_void);
205   std::vector<uint8_t> result(out_len);
206   for (int h = 0; h < height; ++h) {
207     const uint8_t* src_line = buffer + (stride * h);
208     uint8_t* dest_line = result.data() + (width * h * 3);
209     for (int w = 0; w < width; ++w) {
210       // R
211       dest_line[w * 3] = src_line[(w * 4) + 2];
212       // G
213       dest_line[(w * 3) + 1] = src_line[(w * 4) + 1];
214       // B
215       dest_line[(w * 3) + 2] = src_line[w * 4];
216     }
217   }
218   if (fwrite(result.data(), out_len, 1, fp) != 1)
219     fprintf(stderr, "Failed to write to %s\n", filename);
220 
221   fclose(fp);
222   return std::string(filename);
223 }
224 
WriteText(FPDF_PAGE page,const char * pdf_name,int num)225 void WriteText(FPDF_PAGE page, const char* pdf_name, int num) {
226   char filename[256];
227   int chars_formatted =
228       snprintf(filename, sizeof(filename), "%s.%d.txt", pdf_name, num);
229   if (chars_formatted < 0 ||
230       static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
231     fprintf(stderr, "Filename %s is too long\n", filename);
232     return;
233   }
234 
235   FILE* fp = fopen(filename, "w");
236   if (!fp) {
237     fprintf(stderr, "Failed to open %s for output\n", filename);
238     return;
239   }
240 
241   // Output in UTF32-LE.
242   uint32_t bom = 0x0000FEFF;
243   if (fwrite(&bom, sizeof(bom), 1, fp) != 1) {
244     fprintf(stderr, "Failed to write to %s\n", filename);
245     (void)fclose(fp);
246     return;
247   }
248 
249   ScopedFPDFTextPage textpage(FPDFText_LoadPage(page));
250   for (int i = 0; i < FPDFText_CountChars(textpage.get()); i++) {
251     uint32_t c = FPDFText_GetUnicode(textpage.get(), i);
252     if (fwrite(&c, sizeof(c), 1, fp) != 1) {
253       fprintf(stderr, "Failed to write to %s\n", filename);
254       break;
255     }
256   }
257   (void)fclose(fp);
258 }
259 
WriteAnnot(FPDF_PAGE page,const char * pdf_name,int num)260 void WriteAnnot(FPDF_PAGE page, const char* pdf_name, int num) {
261   // Open the output text file.
262   char filename[256];
263   int chars_formatted =
264       snprintf(filename, sizeof(filename), "%s.%d.annot.txt", pdf_name, num);
265   if (chars_formatted < 0 ||
266       static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
267     fprintf(stderr, "Filename %s is too long\n", filename);
268     return;
269   }
270 
271   FILE* fp = fopen(filename, "w");
272   if (!fp) {
273     fprintf(stderr, "Failed to open %s for output\n", filename);
274     return;
275   }
276 
277   int annot_count = FPDFPage_GetAnnotCount(page);
278   fprintf(fp, "Number of annotations: %d\n\n", annot_count);
279 
280   // Iterate through all annotations on this page.
281   for (int i = 0; i < annot_count; ++i) {
282     // Retrieve the annotation object and its subtype.
283     fprintf(fp, "Annotation #%d:\n", i + 1);
284     ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
285     if (!annot) {
286       fprintf(fp, "Failed to retrieve annotation!\n\n");
287       continue;
288     }
289 
290     FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
291     fprintf(fp, "Subtype: %s\n", AnnotSubtypeToCString(subtype));
292 
293     // Retrieve the annotation flags.
294     fprintf(fp, "Flags set: %s\n",
295             AnnotFlagsToString(FPDFAnnot_GetFlags(annot.get())).c_str());
296 
297     // Retrieve the annotation's object count and object types.
298     const int obj_count = FPDFAnnot_GetObjectCount(annot.get());
299     fprintf(fp, "Number of objects: %d\n", obj_count);
300     if (obj_count > 0) {
301       fprintf(fp, "Object types: ");
302       for (int j = 0; j < obj_count; ++j) {
303         const char* type = PageObjectTypeToCString(
304             FPDFPageObj_GetType(FPDFAnnot_GetObject(annot.get(), j)));
305         fprintf(fp, "%s  ", type);
306       }
307       fprintf(fp, "\n");
308     }
309 
310     // Retrieve the annotation's color and interior color.
311     unsigned int R;
312     unsigned int G;
313     unsigned int B;
314     unsigned int A;
315     if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_Color, &R, &G, &B,
316                            &A)) {
317       fprintf(fp, "Color in RGBA: %d %d %d %d\n", R, G, B, A);
318     } else {
319       fprintf(fp, "Failed to retrieve color.\n");
320     }
321     if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_InteriorColor, &R,
322                            &G, &B, &A)) {
323       fprintf(fp, "Interior color in RGBA: %d %d %d %d\n", R, G, B, A);
324     } else {
325       fprintf(fp, "Failed to retrieve interior color.\n");
326     }
327 
328     // Retrieve the annotation's contents and author.
329     static constexpr char kContentsKey[] = "Contents";
330     static constexpr char kAuthorKey[] = "T";
331     unsigned long length_bytes =
332         FPDFAnnot_GetStringValue(annot.get(), kContentsKey, nullptr, 0);
333     std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
334     FPDFAnnot_GetStringValue(annot.get(), kContentsKey, buf.data(),
335                              length_bytes);
336     fprintf(fp, "Content: %ls\n", GetPlatformWString(buf.data()).c_str());
337     length_bytes =
338         FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, nullptr, 0);
339     buf = GetFPDFWideStringBuffer(length_bytes);
340     FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, buf.data(), length_bytes);
341     fprintf(fp, "Author: %ls\n", GetPlatformWString(buf.data()).c_str());
342 
343     // Retrieve the annotation's quadpoints if it is a markup annotation.
344     if (FPDFAnnot_HasAttachmentPoints(annot.get())) {
345       size_t qp_count = FPDFAnnot_CountAttachmentPoints(annot.get());
346       fprintf(fp, "Number of quadpoints sets: %zu\n", qp_count);
347 
348       // Iterate through all quadpoints of the current annotation
349       for (size_t j = 0; j < qp_count; ++j) {
350         FS_QUADPOINTSF quadpoints;
351         if (FPDFAnnot_GetAttachmentPoints(annot.get(), j, &quadpoints)) {
352           fprintf(fp,
353                   "Quadpoints set #%zu: (%.3f, %.3f), (%.3f, %.3f), "
354                   "(%.3f, %.3f), (%.3f, %.3f)\n",
355                   j + 1, quadpoints.x1, quadpoints.y1, quadpoints.x2,
356                   quadpoints.y2, quadpoints.x3, quadpoints.y3, quadpoints.x4,
357                   quadpoints.y4);
358         } else {
359           fprintf(fp, "Failed to retrieve quadpoints set #%zu.\n", j + 1);
360         }
361       }
362     }
363 
364     // Retrieve the annotation's rectangle coordinates.
365     FS_RECTF rect;
366     if (FPDFAnnot_GetRect(annot.get(), &rect)) {
367       fprintf(fp, "Rectangle: l - %.3f, b - %.3f, r - %.3f, t - %.3f\n\n",
368               rect.left, rect.bottom, rect.right, rect.top);
369     } else {
370       fprintf(fp, "Failed to retrieve annotation rectangle.\n");
371     }
372   }
373 
374   (void)fclose(fp);
375 }
376 
WritePng(const char * pdf_name,int num,void * buffer,int stride,int width,int height)377 std::string WritePng(const char* pdf_name,
378                      int num,
379                      void* buffer,
380                      int stride,
381                      int width,
382                      int height) {
383   if (!CheckDimensions(stride, width, height))
384     return "";
385 
386   auto input =
387       pdfium::make_span(static_cast<uint8_t*>(buffer), stride * height);
388   std::vector<uint8_t> png_encoding =
389       EncodePng(input, width, height, stride, FPDFBitmap_BGRA);
390   if (png_encoding.empty()) {
391     fprintf(stderr, "Failed to convert bitmap to PNG\n");
392     return "";
393   }
394 
395   char filename[256];
396   int chars_formatted =
397       snprintf(filename, sizeof(filename), "%s.%d.png", pdf_name, num);
398   if (chars_formatted < 0 ||
399       static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
400     fprintf(stderr, "Filename %s is too long\n", filename);
401     return "";
402   }
403 
404   FILE* fp = fopen(filename, "wb");
405   if (!fp) {
406     fprintf(stderr, "Failed to open %s for output\n", filename);
407     return "";
408   }
409 
410   size_t bytes_written =
411       fwrite(&png_encoding.front(), 1, png_encoding.size(), fp);
412   if (bytes_written != png_encoding.size())
413     fprintf(stderr, "Failed to write to %s\n", filename);
414 
415   (void)fclose(fp);
416   return std::string(filename);
417 }
418 
419 #ifdef _WIN32
WriteBmp(const char * pdf_name,int num,void * buffer,int stride,int width,int height)420 std::string WriteBmp(const char* pdf_name,
421                      int num,
422                      void* buffer,
423                      int stride,
424                      int width,
425                      int height) {
426   if (!CheckDimensions(stride, width, height))
427     return "";
428 
429   int out_len = stride * height;
430   if (out_len > INT_MAX / 3)
431     return "";
432 
433   char filename[256];
434   snprintf(filename, sizeof(filename), "%s.%d.bmp", pdf_name, num);
435   FILE* fp = fopen(filename, "wb");
436   if (!fp)
437     return "";
438 
439   BITMAPINFO bmi = {};
440   bmi.bmiHeader.biSize = sizeof(bmi) - sizeof(RGBQUAD);
441   bmi.bmiHeader.biWidth = width;
442   bmi.bmiHeader.biHeight = -height;  // top-down image
443   bmi.bmiHeader.biPlanes = 1;
444   bmi.bmiHeader.biBitCount = 32;
445   bmi.bmiHeader.biCompression = BI_RGB;
446   bmi.bmiHeader.biSizeImage = 0;
447 
448   BITMAPFILEHEADER file_header = {};
449   file_header.bfType = 0x4d42;
450   file_header.bfSize = sizeof(file_header) + bmi.bmiHeader.biSize + out_len;
451   file_header.bfOffBits = file_header.bfSize - out_len;
452 
453   if (fwrite(&file_header, sizeof(file_header), 1, fp) != 1 ||
454       fwrite(&bmi, bmi.bmiHeader.biSize, 1, fp) != 1 ||
455       fwrite(buffer, out_len, 1, fp) != 1) {
456     fprintf(stderr, "Failed to write to %s\n", filename);
457   }
458   fclose(fp);
459   return std::string(filename);
460 }
461 
WriteEmf(FPDF_PAGE page,const char * pdf_name,int num)462 void WriteEmf(FPDF_PAGE page, const char* pdf_name, int num) {
463   char filename[256];
464   snprintf(filename, sizeof(filename), "%s.%d.emf", pdf_name, num);
465 
466   HDC dc = CreateEnhMetaFileA(nullptr, filename, nullptr, nullptr);
467 
468   int width = static_cast<int>(FPDF_GetPageWidthF(page));
469   int height = static_cast<int>(FPDF_GetPageHeightF(page));
470   HRGN rgn = CreateRectRgn(0, 0, width, height);
471   SelectClipRgn(dc, rgn);
472   DeleteObject(rgn);
473 
474   SelectObject(dc, GetStockObject(NULL_PEN));
475   SelectObject(dc, GetStockObject(WHITE_BRUSH));
476   // If a PS_NULL pen is used, the dimensions of the rectangle are 1 pixel less.
477   Rectangle(dc, 0, 0, width + 1, height + 1);
478 
479   FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
480 
481   DeleteEnhMetaFile(CloseEnhMetaFile(dc));
482 }
483 
WritePS(FPDF_PAGE page,const char * pdf_name,int num)484 void WritePS(FPDF_PAGE page, const char* pdf_name, int num) {
485   char filename[256];
486   snprintf(filename, sizeof(filename), "%s.%d.ps", pdf_name, num);
487   FILE* fp = fopen(filename, "wb");
488   if (!fp)
489     return;
490 
491   HDC dc = CreateEnhMetaFileA(nullptr, nullptr, nullptr, nullptr);
492 
493   int width = static_cast<int>(FPDF_GetPageWidthF(page));
494   int height = static_cast<int>(FPDF_GetPageHeightF(page));
495   FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
496 
497   HENHMETAFILE emf = CloseEnhMetaFile(dc);
498   std::vector<const ENHMETARECORD*> items;
499   EnumEnhMetaFile(nullptr, emf, &EnhMetaFileProc, &items, nullptr);
500   for (const ENHMETARECORD* record : items) {
501     if (record->iType != EMR_GDICOMMENT)
502       continue;
503 
504     const auto* comment = reinterpret_cast<const EMRGDICOMMENT*>(record);
505     const char* data = reinterpret_cast<const char*>(comment->Data);
506     uint16_t size = *reinterpret_cast<const uint16_t*>(data);
507     if (fwrite(data + sizeof(uint16_t), size, 1, fp) != 1) {
508       fprintf(stderr, "Failed to write to %s\n", filename);
509       break;
510     }
511   }
512   fclose(fp);
513   DeleteEnhMetaFile(emf);
514 }
515 #endif  // _WIN32
516 
517 #ifdef PDF_ENABLE_SKIA
WriteSkp(const char * pdf_name,int num,SkPictureRecorder * recorder)518 std::string WriteSkp(const char* pdf_name,
519                      int num,
520                      SkPictureRecorder* recorder) {
521   char filename[256];
522   int chars_formatted =
523       snprintf(filename, sizeof(filename), "%s.%d.skp", pdf_name, num);
524 
525   if (chars_formatted < 0 ||
526       static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
527     fprintf(stderr, "Filename %s is too long\n", filename);
528     return "";
529   }
530 
531   sk_sp<SkPicture> picture(recorder->finishRecordingAsPicture());
532   SkFILEWStream wStream(filename);
533   picture->serialize(&wStream);
534   return std::string(filename);
535 }
536 #endif
537 
538 enum class ThumbnailDecodeType { kBitmap, kRawStream, kDecodedStream };
539 
GetThumbnailFilename(char * name_buf,size_t name_buf_size,const char * pdf_name,int page_num,ThumbnailDecodeType decode_type)540 bool GetThumbnailFilename(char* name_buf,
541                           size_t name_buf_size,
542                           const char* pdf_name,
543                           int page_num,
544                           ThumbnailDecodeType decode_type) {
545   const char* format;
546   switch (decode_type) {
547     case ThumbnailDecodeType::kBitmap:
548       format = "%s.thumbnail.%d.png";
549       break;
550     case ThumbnailDecodeType::kDecodedStream:
551       format = "%s.thumbnail.decoded.%d.bin";
552       break;
553     case ThumbnailDecodeType::kRawStream:
554       format = "%s.thumbnail.raw.%d.bin";
555       break;
556   }
557 
558   int chars_formatted =
559       snprintf(name_buf, name_buf_size, format, pdf_name, page_num);
560   if (chars_formatted < 0 ||
561       static_cast<size_t>(chars_formatted) >= name_buf_size) {
562     fprintf(stderr, "Filename %s for saving is too long.\n", name_buf);
563     return false;
564   }
565 
566   return true;
567 }
568 
WriteBufferToFile(const void * buf,size_t buflen,const char * filename,const char * filetype)569 void WriteBufferToFile(const void* buf,
570                        size_t buflen,
571                        const char* filename,
572                        const char* filetype) {
573   FILE* fp = fopen(filename, "wb");
574   if (!fp) {
575     fprintf(stderr, "Failed to open %s for saving %s.", filename, filetype);
576     return;
577   }
578 
579   size_t bytes_written = fwrite(buf, 1, buflen, fp);
580   if (bytes_written == buflen)
581     fprintf(stderr, "Successfully wrote %s %s.\n", filetype, filename);
582   else
583     fprintf(stderr, "Failed to write to %s.\n", filename);
584   fclose(fp);
585 }
586 
EncodeBitmapToPng(ScopedFPDFBitmap bitmap)587 std::vector<uint8_t> EncodeBitmapToPng(ScopedFPDFBitmap bitmap) {
588   std::vector<uint8_t> png_encoding;
589   int format = FPDFBitmap_GetFormat(bitmap.get());
590   if (format == FPDFBitmap_Unknown)
591     return png_encoding;
592 
593   int width = FPDFBitmap_GetWidth(bitmap.get());
594   int height = FPDFBitmap_GetHeight(bitmap.get());
595   int stride = FPDFBitmap_GetStride(bitmap.get());
596   if (!CheckDimensions(stride, width, height))
597     return png_encoding;
598 
599   auto input = pdfium::make_span(
600       static_cast<const uint8_t*>(FPDFBitmap_GetBuffer(bitmap.get())),
601       stride * height);
602 
603   png_encoding = EncodePng(input, width, height, stride, format);
604   return png_encoding;
605 }
606 
WriteAttachments(FPDF_DOCUMENT doc,const std::string & name)607 void WriteAttachments(FPDF_DOCUMENT doc, const std::string& name) {
608   for (int i = 0; i < FPDFDoc_GetAttachmentCount(doc); ++i) {
609     FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(doc, i);
610 
611     // Retrieve the attachment file name.
612     std::string attachment_name;
613     unsigned long length_bytes = FPDFAttachment_GetName(attachment, nullptr, 0);
614     if (length_bytes) {
615       std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
616       unsigned long actual_length_bytes =
617           FPDFAttachment_GetName(attachment, buf.data(), length_bytes);
618       if (actual_length_bytes == length_bytes)
619         attachment_name = GetPlatformString(buf.data());
620     }
621     if (attachment_name.empty()) {
622       fprintf(stderr, "Attachment #%d has an empty file name.\n", i + 1);
623       continue;
624     }
625 
626     // Calculate the full attachment file name.
627     char save_name[256];
628     int chars_formatted =
629         snprintf(save_name, sizeof(save_name), "%s.attachment.%s", name.c_str(),
630                  attachment_name.c_str());
631     if (chars_formatted < 0 ||
632         static_cast<size_t>(chars_formatted) >= sizeof(save_name)) {
633       fprintf(stderr, "Filename %s is too long.\n", save_name);
634       continue;
635     }
636 
637     // Retrieve the attachment.
638     length_bytes = FPDFAttachment_GetFile(attachment, nullptr, 0);
639     std::vector<char> data_buf(length_bytes);
640     if (length_bytes) {
641       unsigned long actual_length_bytes =
642           FPDFAttachment_GetFile(attachment, data_buf.data(), length_bytes);
643       if (actual_length_bytes != length_bytes)
644         data_buf.clear();
645     }
646     if (data_buf.empty()) {
647       fprintf(stderr, "Attachment \"%s\" is empty.\n", attachment_name.c_str());
648       continue;
649     }
650 
651     // Write the attachment file.
652     WriteBufferToFile(data_buf.data(), length_bytes, save_name, "attachment");
653   }
654 }
655 
WriteImages(FPDF_PAGE page,const char * pdf_name,int page_num)656 void WriteImages(FPDF_PAGE page, const char* pdf_name, int page_num) {
657   for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
658     FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
659     if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE)
660       continue;
661 
662     ScopedFPDFBitmap bitmap(FPDFImageObj_GetBitmap(obj));
663     if (!bitmap) {
664       fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
665               i + 1, page_num + 1);
666       continue;
667     }
668 
669     char filename[256];
670     int chars_formatted = snprintf(filename, sizeof(filename), "%s.%d.%d.png",
671                                    pdf_name, page_num, i);
672     if (chars_formatted < 0 ||
673         static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
674       fprintf(stderr, "Filename %s for saving image is too long.\n", filename);
675       continue;
676     }
677 
678     std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
679     if (png_encoding.empty()) {
680       fprintf(stderr,
681               "Failed to convert image object #%d, on page #%d to png.\n",
682               i + 1, page_num + 1);
683       continue;
684     }
685 
686     WriteBufferToFile(&png_encoding.front(), png_encoding.size(), filename,
687                       "image");
688   }
689 }
690 
WriteDecodedThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)691 void WriteDecodedThumbnailStream(FPDF_PAGE page,
692                                  const char* pdf_name,
693                                  int page_num) {
694   char filename[256];
695   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
696                             ThumbnailDecodeType::kDecodedStream)) {
697     return;
698   }
699 
700   unsigned long decoded_data_size =
701       FPDFPage_GetDecodedThumbnailData(page, nullptr, 0u);
702 
703   // Only continue if there actually is a thumbnail for this page
704   if (decoded_data_size == 0) {
705     fprintf(stderr, "Failed to get decoded thumbnail for page #%d.\n",
706             page_num + 1);
707     return;
708   }
709 
710   std::vector<uint8_t> thumb_buf(decoded_data_size);
711   if (FPDFPage_GetDecodedThumbnailData(
712           page, thumb_buf.data(), decoded_data_size) != decoded_data_size) {
713     fprintf(stderr, "Failed to get decoded thumbnail data for %s.\n", filename);
714     return;
715   }
716 
717   WriteBufferToFile(thumb_buf.data(), decoded_data_size, filename,
718                     "decoded thumbnail");
719 }
720 
WriteRawThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)721 void WriteRawThumbnailStream(FPDF_PAGE page,
722                              const char* pdf_name,
723                              int page_num) {
724   char filename[256];
725   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
726                             ThumbnailDecodeType::kRawStream)) {
727     return;
728   }
729 
730   unsigned long raw_data_size = FPDFPage_GetRawThumbnailData(page, nullptr, 0u);
731 
732   // Only continue if there actually is a thumbnail for this page
733   if (raw_data_size == 0) {
734     fprintf(stderr, "Failed to get raw thumbnail data for page #%d.\n",
735             page_num + 1);
736     return;
737   }
738 
739   std::vector<uint8_t> thumb_buf(raw_data_size);
740   if (FPDFPage_GetRawThumbnailData(page, thumb_buf.data(), raw_data_size) !=
741       raw_data_size) {
742     fprintf(stderr, "Failed to get raw thumbnail data for %s.\n", filename);
743     return;
744   }
745 
746   WriteBufferToFile(thumb_buf.data(), raw_data_size, filename, "raw thumbnail");
747 }
748 
WriteThumbnail(FPDF_PAGE page,const char * pdf_name,int page_num)749 void WriteThumbnail(FPDF_PAGE page, const char* pdf_name, int page_num) {
750   char filename[256];
751   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
752                             ThumbnailDecodeType::kBitmap)) {
753     return;
754   }
755 
756   ScopedFPDFBitmap thumb_bitmap(FPDFPage_GetThumbnailAsBitmap(page));
757   if (!thumb_bitmap) {
758     fprintf(stderr, "Thumbnail of page #%d has an empty bitmap.\n",
759             page_num + 1);
760     return;
761   }
762 
763   std::vector<uint8_t> png_encoding =
764       EncodeBitmapToPng(std::move(thumb_bitmap));
765   if (png_encoding.empty()) {
766     fprintf(stderr, "Failed to convert thumbnail of page #%d to png.\n",
767             page_num + 1);
768     return;
769   }
770 
771   WriteBufferToFile(&png_encoding.front(), png_encoding.size(), filename,
772                     "thumbnail");
773 }
774