1 /*
2  * Copyright 2015 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/pdf/SkPDFMetadata.h"
9 
10 #include "include/core/SkMilestone.h"
11 #include "include/private/SkTo.h"
12 #include "src/core/SkMD5.h"
13 #include "src/core/SkUtils.h"
14 #include "src/pdf/SkPDFTypes.h"
15 
16 #include <utility>
17 
18 #define SKPDF_STRING(X) SKPDF_STRING_IMPL(X)
19 #define SKPDF_STRING_IMPL(X) #X
20 #define SKPDF_PRODUCER "Skia/PDF m" SKPDF_STRING(SK_MILESTONE)
21 #define SKPDF_CUSTOM_PRODUCER_KEY "ProductionLibrary"
22 
23 static constexpr SkTime::DateTime kZeroTime = {0, 0, 0, 0, 0, 0, 0, 0};
24 
operator !=(const SkTime::DateTime & u,const SkTime::DateTime & v)25 static bool operator!=(const SkTime::DateTime& u, const SkTime::DateTime& v) {
26     return u.fTimeZoneMinutes != v.fTimeZoneMinutes ||
27            u.fYear != v.fYear ||
28            u.fMonth != v.fMonth ||
29            u.fDayOfWeek != v.fDayOfWeek ||
30            u.fDay != v.fDay ||
31            u.fHour != v.fHour ||
32            u.fMinute != v.fMinute ||
33            u.fSecond != v.fSecond;
34 }
35 
pdf_date(const SkTime::DateTime & dt)36 static SkString pdf_date(const SkTime::DateTime& dt) {
37     int timeZoneMinutes = SkToInt(dt.fTimeZoneMinutes);
38     char timezoneSign = timeZoneMinutes >= 0 ? '+' : '-';
39     int timeZoneHours = SkTAbs(timeZoneMinutes) / 60;
40     timeZoneMinutes = SkTAbs(timeZoneMinutes) % 60;
41     return SkStringPrintf(
42             "D:%04u%02u%02u%02u%02u%02u%c%02d'%02d'",
43             static_cast<unsigned>(dt.fYear), static_cast<unsigned>(dt.fMonth),
44             static_cast<unsigned>(dt.fDay), static_cast<unsigned>(dt.fHour),
45             static_cast<unsigned>(dt.fMinute),
46             static_cast<unsigned>(dt.fSecond), timezoneSign, timeZoneHours,
47             timeZoneMinutes);
48 }
49 
utf8_is_pdfdocencoding(const char * src,size_t len)50 static bool utf8_is_pdfdocencoding(const char* src, size_t len) {
51     const uint8_t* end = (const uint8_t*)src + len;
52     for (const uint8_t* ptr = (const uint8_t*)src; ptr < end; ++ptr) {
53         uint8_t v = *ptr;
54         // See Table D.2 (PDFDocEncoding Character Set) in the PDF3200_2008 spec.
55         if ((v > 23 && v < 32) || v > 126) {
56             return false;
57         }
58     }
59     return true;
60 }
61 
write_utf16be(char ** ptr,uint16_t value)62 void write_utf16be(char** ptr, uint16_t value) {
63     *(*ptr)++ = (value >> 8);
64     *(*ptr)++ = (value & 0xFF);
65 }
66 
67 // Please Note:  This "abuses" the SkString, which "should" only hold UTF8.
68 // But the SkString is written as if it is really just a ref-counted array of
69 // chars, so this works, as long as we handle endiness and conversions ourselves.
70 //
71 // Input:  UTF-8
72 // Output  UTF-16-BE
to_utf16be(const char * src,size_t len)73 static SkString to_utf16be(const char* src, size_t len) {
74     SkString ret;
75     const char* const end = src + len;
76     size_t n = 1;  // BOM
77     for (const char* ptr = src; ptr < end;) {
78         SkUnichar u = SkUTF::NextUTF8(&ptr, end);
79         if (u < 0) {
80             break;
81         }
82         n += SkUTF::ToUTF16(u);
83     }
84     ret.resize(2 * n);
85     char* out = ret.writable_str();
86     write_utf16be(&out, 0xFEFF);  // BOM
87     for (const char* ptr = src; ptr < end;) {
88         SkUnichar u = SkUTF::NextUTF8(&ptr, end);
89         if (u < 0) {
90             break;
91         }
92         uint16_t utf16[2];
93         size_t l = SkUTF::ToUTF16(u, utf16);
94         write_utf16be(&out, utf16[0]);
95         if (l == 2) {
96             write_utf16be(&out, utf16[1]);
97         }
98     }
99     SkASSERT(out == ret.writable_str() + 2 * n);
100     return ret;
101 }
102 
103 // Input:  UTF-8
104 // Output  UTF-16-BE OR PDFDocEncoding (if that encoding is identical to ASCII encoding).
105 //
106 // See sections 14.3.3 (Document Information Dictionary) and 7.9.2.2 (Text String Type)
107 // of the PDF32000_2008 spec.
convert(const SkString & s)108 static SkString convert(const SkString& s) {
109     return utf8_is_pdfdocencoding(s.c_str(), s.size()) ? s : to_utf16be(s.c_str(), s.size());
110 }
convert(const char * src)111 static SkString convert(const char* src) {
112     size_t len = strlen(src);
113     return utf8_is_pdfdocencoding(src, len) ? SkString(src, len) : to_utf16be(src, len);
114 }
115 
116 namespace {
117 static const struct {
118     const char* const key;
119     SkString SkPDF::Metadata::*const valuePtr;
120 } gMetadataKeys[] = {
121         {"Title", &SkPDF::Metadata::fTitle},
122         {"Author", &SkPDF::Metadata::fAuthor},
123         {"Subject", &SkPDF::Metadata::fSubject},
124         {"Keywords", &SkPDF::Metadata::fKeywords},
125         {"Creator", &SkPDF::Metadata::fCreator},
126 };
127 }  // namespace
128 
MakeDocumentInformationDict(const SkPDF::Metadata & metadata)129 std::unique_ptr<SkPDFObject> SkPDFMetadata::MakeDocumentInformationDict(
130         const SkPDF::Metadata& metadata) {
131     auto dict = SkPDFMakeDict();
132     for (const auto keyValuePtr : gMetadataKeys) {
133         const SkString& value = metadata.*(keyValuePtr.valuePtr);
134         if (value.size() > 0) {
135             dict->insertString(keyValuePtr.key, convert(value));
136         }
137     }
138     if (metadata.fProducer.isEmpty()) {
139         dict->insertString("Producer", convert(SKPDF_PRODUCER));
140     } else {
141         dict->insertString("Producer", convert(metadata.fProducer));
142         dict->insertString(SKPDF_CUSTOM_PRODUCER_KEY, convert(SKPDF_PRODUCER));
143     }
144     if (metadata.fCreation != kZeroTime) {
145         dict->insertString("CreationDate", pdf_date(metadata.fCreation));
146     }
147     if (metadata.fModified != kZeroTime) {
148         dict->insertString("ModDate", pdf_date(metadata.fModified));
149     }
150     return dict;
151 }
152 
CreateUUID(const SkPDF::Metadata & metadata)153 SkUUID SkPDFMetadata::CreateUUID(const SkPDF::Metadata& metadata) {
154     // The main requirement is for the UUID to be unique; the exact
155     // format of the data that will be hashed is not important.
156     SkMD5 md5;
157     const char uuidNamespace[] = "org.skia.pdf\n";
158     md5.writeText(uuidNamespace);
159     double msec = SkTime::GetMSecs();
160     md5.write(&msec, sizeof(msec));
161     SkTime::DateTime dateTime;
162     SkTime::GetDateTime(&dateTime);
163     md5.write(&dateTime, sizeof(dateTime));
164     md5.write(&metadata.fCreation, sizeof(metadata.fCreation));
165     md5.write(&metadata.fModified, sizeof(metadata.fModified));
166 
167     for (const auto keyValuePtr : gMetadataKeys) {
168         md5.writeText(keyValuePtr.key);
169         md5.write("\037", 1);
170         const SkString& value = metadata.*(keyValuePtr.valuePtr);
171         md5.write(value.c_str(), value.size());
172         md5.write("\036", 1);
173     }
174     SkMD5::Digest digest = md5.finish();
175     // See RFC 4122, page 6-7.
176     digest.data[6] = (digest.data[6] & 0x0F) | 0x30;
177     digest.data[8] = (digest.data[6] & 0x3F) | 0x80;
178     static_assert(sizeof(digest) == sizeof(SkUUID), "uuid_size");
179     SkUUID uuid;
180     memcpy(&uuid, &digest, sizeof(digest));
181     return uuid;
182 }
183 
MakePdfId(const SkUUID & doc,const SkUUID & instance)184 std::unique_ptr<SkPDFObject> SkPDFMetadata::MakePdfId(const SkUUID& doc,
185                                             const SkUUID& instance) {
186     // /ID [ <81b14aafa313db63dbd6f981e49f94f4>
187     //       <81b14aafa313db63dbd6f981e49f94f4> ]
188     auto array = SkPDFMakeArray();
189     static_assert(sizeof(SkUUID) == 16, "uuid_size");
190     array->appendString(
191             SkString(reinterpret_cast<const char*>(&doc), sizeof(SkUUID)));
192     array->appendString(
193             SkString(reinterpret_cast<const char*>(&instance), sizeof(SkUUID)));
194     return array;
195 }
196 
197 // Convert a block of memory to hexadecimal.  Input and output pointers will be
198 // moved to end of the range.
hexify(const uint8_t ** inputPtr,char ** outputPtr,int count)199 static void hexify(const uint8_t** inputPtr, char** outputPtr, int count) {
200     SkASSERT(inputPtr && *inputPtr);
201     SkASSERT(outputPtr && *outputPtr);
202     while (count-- > 0) {
203         uint8_t value = *(*inputPtr)++;
204         *(*outputPtr)++ = SkHexadecimalDigits::gLower[value >> 4];
205         *(*outputPtr)++ = SkHexadecimalDigits::gLower[value & 0xF];
206     }
207 }
208 
uuid_to_string(const SkUUID & uuid)209 static SkString uuid_to_string(const SkUUID& uuid) {
210     //  8-4-4-4-12
211     char buffer[36];  // [32 + 4]
212     char* ptr = buffer;
213     const uint8_t* data = uuid.fData;
214     hexify(&data, &ptr, 4);
215     *ptr++ = '-';
216     hexify(&data, &ptr, 2);
217     *ptr++ = '-';
218     hexify(&data, &ptr, 2);
219     *ptr++ = '-';
220     hexify(&data, &ptr, 2);
221     *ptr++ = '-';
222     hexify(&data, &ptr, 6);
223     SkASSERT(ptr == buffer + 36);
224     SkASSERT(data == uuid.fData + 16);
225     return SkString(buffer, 36);
226 }
227 
228 namespace {
229 class PDFXMLObject final : public SkPDFObject {
230 public:
PDFXMLObject(SkString xml)231     PDFXMLObject(SkString xml) : fXML(std::move(xml)) {}
emitObject(SkWStream * stream) const232     void emitObject(SkWStream* stream) const override {
233         SkPDFDict dict("Metadata");
234         dict.insertName("Subtype", "XML");
235         dict.insertInt("Length", fXML.size());
236         dict.emitObject(stream);
237         static const char streamBegin[] = " stream\n";
238         stream->writeText(streamBegin);
239         // Do not compress this.  The standard requires that a
240         // program that does not understand PDF can grep for
241         // "<?xpacket" and extract the entire XML.
242         stream->write(fXML.c_str(), fXML.size());
243         static const char streamEnd[] = "\nendstream";
244         stream->writeText(streamEnd);
245     }
246 
247 private:
248     const SkString fXML;
249 };
250 }  // namespace
251 
count_xml_escape_size(const SkString & input)252 static int count_xml_escape_size(const SkString& input) {
253     int extra = 0;
254     for (size_t i = 0; i < input.size(); ++i) {
255         if (input[i] == '&') {
256             extra += 4;  // strlen("&amp;") - strlen("&")
257         } else if (input[i] == '<') {
258             extra += 3;  // strlen("&lt;") - strlen("<")
259         }
260     }
261     return extra;
262 }
263 
escape_xml(const SkString & input,const char * before=nullptr,const char * after=nullptr)264 const SkString escape_xml(const SkString& input,
265                           const char* before = nullptr,
266                           const char* after = nullptr) {
267     if (input.size() == 0) {
268         return input;
269     }
270     // "&" --> "&amp;" and  "<" --> "&lt;"
271     // text is assumed to be in UTF-8
272     // all strings are xml content, not attribute values.
273     size_t beforeLen = before ? strlen(before) : 0;
274     size_t afterLen = after ? strlen(after) : 0;
275     int extra = count_xml_escape_size(input);
276     SkString output(input.size() + extra + beforeLen + afterLen);
277     char* out = output.writable_str();
278     if (before) {
279         strncpy(out, before, beforeLen);
280         out += beforeLen;
281     }
282     static const char kAmp[] = "&amp;";
283     static const char kLt[] = "&lt;";
284     for (size_t i = 0; i < input.size(); ++i) {
285         if (input[i] == '&') {
286             strncpy(out, kAmp, strlen(kAmp));
287             out += strlen(kAmp);
288         } else if (input[i] == '<') {
289             strncpy(out, kLt, strlen(kLt));
290             out += strlen(kLt);
291         } else {
292             *out++ = input[i];
293         }
294     }
295     if (after) {
296         strncpy(out, after, afterLen);
297         out += afterLen;
298     }
299     // Validate that we haven't written outside of our string.
300     SkASSERT(out == &output.writable_str()[output.size()]);
301     *out = '\0';
302     return output;
303 }
304 
MakeXMPObject(const SkPDF::Metadata & metadata,const SkUUID & doc,const SkUUID & instance,SkPDFDocument * docPtr)305 SkPDFIndirectReference SkPDFMetadata::MakeXMPObject(
306         const SkPDF::Metadata& metadata,
307         const SkUUID& doc,
308         const SkUUID& instance,
309         SkPDFDocument* docPtr) {
310     static const char templateString[] =
311             "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n"
312             "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n"
313             " x:xmptk=\"Adobe XMP Core 5.4-c005 78.147326, "
314             "2012/08/23-13:03:03\">\n"
315             "<rdf:RDF "
316             "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n"
317             "<rdf:Description rdf:about=\"\"\n"
318             " xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n"
319             " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"
320             " xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\"\n"
321             " xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n"
322             " xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n"
323             "<pdfaid:part>2</pdfaid:part>\n"
324             "<pdfaid:conformance>B</pdfaid:conformance>\n"
325             "%s"  // ModifyDate
326             "%s"  // CreateDate
327             "%s"  // xmp:CreatorTool
328             "<dc:format>application/pdf</dc:format>\n"
329             "%s"  // dc:title
330             "%s"  // dc:description
331             "%s"  // author
332             "%s"  // keywords
333             "<xmpMM:DocumentID>uuid:%s</xmpMM:DocumentID>\n"
334             "<xmpMM:InstanceID>uuid:%s</xmpMM:InstanceID>\n"
335             "%s"  // pdf:Producer
336             "%s"  // pdf:Keywords
337             "</rdf:Description>\n"
338             "</rdf:RDF>\n"
339             "</x:xmpmeta>\n"  // Note:  the standard suggests 4k of padding.
340             "<?xpacket end=\"w\"?>\n";
341 
342     SkString creationDate;
343     SkString modificationDate;
344     if (metadata.fCreation != kZeroTime) {
345         SkString tmp;
346         metadata.fCreation.toISO8601(&tmp);
347         SkASSERT(0 == count_xml_escape_size(tmp));
348         // YYYY-mm-ddTHH:MM:SS[+|-]ZZ:ZZ; no need to escape
349         creationDate = SkStringPrintf("<xmp:CreateDate>%s</xmp:CreateDate>\n",
350                                       tmp.c_str());
351     }
352     if (metadata.fModified != kZeroTime) {
353         SkString tmp;
354         metadata.fModified.toISO8601(&tmp);
355         SkASSERT(0 == count_xml_escape_size(tmp));
356         modificationDate = SkStringPrintf(
357                 "<xmp:ModifyDate>%s</xmp:ModifyDate>\n", tmp.c_str());
358     }
359     SkString title =
360             escape_xml(metadata.fTitle,
361                        "<dc:title><rdf:Alt><rdf:li xml:lang=\"x-default\">",
362                        "</rdf:li></rdf:Alt></dc:title>\n");
363     SkString author =
364             escape_xml(metadata.fAuthor, "<dc:creator><rdf:Bag><rdf:li>",
365                        "</rdf:li></rdf:Bag></dc:creator>\n");
366     // TODO: in theory, XMP can support multiple authors.  Split on a delimiter?
367     SkString subject = escape_xml(
368             metadata.fSubject,
369             "<dc:description><rdf:Alt><rdf:li xml:lang=\"x-default\">",
370             "</rdf:li></rdf:Alt></dc:description>\n");
371     SkString keywords1 =
372             escape_xml(metadata.fKeywords, "<dc:subject><rdf:Bag><rdf:li>",
373                        "</rdf:li></rdf:Bag></dc:subject>\n");
374     SkString keywords2 = escape_xml(metadata.fKeywords, "<pdf:Keywords>",
375                                     "</pdf:Keywords>\n");
376     // TODO: in theory, keywords can be a list too.
377 
378     SkString producer("<pdf:Producer>" SKPDF_PRODUCER "</pdf:Producer>\n");
379     if (!metadata.fProducer.isEmpty()) {
380         // TODO: register a developer prefix to make
381         // <skia:SKPDF_CUSTOM_PRODUCER_KEY> a real XML tag.
382         producer = escape_xml(
383                 metadata.fProducer, "<pdf:Producer>",
384                 "</pdf:Producer>\n<!-- <skia:" SKPDF_CUSTOM_PRODUCER_KEY ">"
385                 SKPDF_PRODUCER "</skia:" SKPDF_CUSTOM_PRODUCER_KEY "> -->\n");
386     }
387 
388     SkString creator = escape_xml(metadata.fCreator, "<xmp:CreatorTool>",
389                                   "</xmp:CreatorTool>\n");
390     SkString documentID = uuid_to_string(doc);  // no need to escape
391     SkASSERT(0 == count_xml_escape_size(documentID));
392     SkString instanceID = uuid_to_string(instance);
393     SkASSERT(0 == count_xml_escape_size(instanceID));
394 
395 
396     auto value = SkStringPrintf(
397             templateString, modificationDate.c_str(), creationDate.c_str(),
398             creator.c_str(), title.c_str(), subject.c_str(), author.c_str(),
399             keywords1.c_str(), documentID.c_str(), instanceID.c_str(),
400             producer.c_str(), keywords2.c_str());
401 
402     std::unique_ptr<SkPDFDict> dict = SkPDFMakeDict("Metadata");
403     dict->insertName("Subtype", "XML");
404     return SkPDFStreamOut(std::move(dict),
405                           SkMemoryStream::MakeCopy(value.c_str(), value.size()),
406                           docPtr, false);
407 }
408 
409 #undef SKPDF_CUSTOM_PRODUCER_KEY
410 #undef SKPDF_PRODUCER
411 #undef SKPDF_STRING
412 #undef SKPDF_STRING_IMPL
413