1 // Copyright 2008-present Contributors to the OpenImageIO project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/OpenImageIO/oiio/blob/master/LICENSE.md
4
5
6 #include <iostream>
7
8 #include <boost/container/flat_map.hpp>
9
10 #include <OpenImageIO/fmath.h>
11 #include <OpenImageIO/imageio.h>
12 #include <OpenImageIO/strutil.h>
13 #include <OpenImageIO/tiffutils.h>
14 #include <OpenImageIO/timer.h>
15
16 extern "C" {
17 #include "tiff.h"
18 }
19
20 #if USE_EXTERNAL_PUGIXML
21 # include <pugixml.hpp>
22 #else
23 # include <OpenImageIO/detail/pugixml/pugixml.hpp>
24 #endif
25
26 #define DEBUG_XMP_READ 0
27 #define DEBUG_XMP_WRITE 0
28
29 #define MY_ENCODING "ISO-8859-1"
30
31 OIIO_NAMESPACE_BEGIN
32
33 namespace { // anonymous
34
35
36 // Define special processing flags -- they're individual bits so can be
37 // combined with '|'
38 enum XMPspecial {
39 NothingSpecial = 0,
40 Rational = 1, // It needs to be expressed as A/B
41 DateConversion = 2, // It's a date, may need conversion to canonical form
42 TiffRedundant = 4, // It's something that's part of normal TIFF tags
43 ExifRedundant = 8, // It's something included in Exif
44 Suppress = 16, // Explicitly suppress it from XMP
45 IsList = 32, // Make a semicolon-separated list out of it
46 IsSeq = 64, // Like List, but order matters
47 IsBool = 128 // Should be output as True/False
48 };
49
50 struct XMPtag {
51 const char* xmpname; // XMP name
52 const char* oiioname; // Attribute name we use
53 TypeDesc oiiotype; // Type we use
54 int special; // Special handling
55
XMPtag__anon1103262b0111::XMPtag56 XMPtag(const char* xname, const char* oname, TypeDesc type = TypeUnknown,
57 int spec = 0)
58 : xmpname(xname)
59 , oiioname(oname)
60 , oiiotype(type)
61 , special(spec)
62 {
63 }
64 };
65
66 static XMPtag xmptag[] = {
67 // clang-format off
68 { "photoshop:AuthorsPosition", "IPTC:AuthorsPosition", TypeDesc::STRING, 0 },
69 { "photoshop:CaptionWriter", "IPTC:CaptionWriter", TypeDesc::STRING, 0 },
70 { "photoshop:Category", "IPTC:Category", TypeDesc::STRING, 0 },
71 { "photoshop:City", "IPTC:City", TypeDesc::STRING, 0 },
72 { "photoshop:Country", "IPTC:Country", TypeDesc::STRING, 0 },
73 { "photoshop:Credit", "IPTC:Provider", TypeDesc::STRING, 0 },
74 { "photoshop:DateCreated", "DateTime", TypeDesc::STRING, DateConversion|TiffRedundant },
75 { "photoshop:Headline", "IPTC:Headline", TypeDesc::STRING, 0 },
76 { "photoshop:History", "ImageHistory", TypeDesc::STRING, 0 },
77 { "photoshop:Instructions", "IPTC:Instructions", TypeDesc::STRING, 0 },
78 { "photoshop:Source", "IPTC:Source", TypeDesc::STRING, 0 },
79 { "photoshop:State", "IPTC:State", TypeDesc::STRING, 0 },
80 { "photoshop:SupplementalCategories", "IPTC:SupplementalCategories", TypeDesc::STRING, IsList|Suppress }, // FIXME -- un-suppress when we have it working
81 { "photoshop:TransmissionReference", "IPTC:TransmissionReference", TypeDesc::STRING, 0 },
82 { "photoshop:Urgency", "photoshop:Urgency", TypeDesc::INT, 0 },
83
84 { "tiff:Compression", "tiff:Compression", TypeDesc::INT, TiffRedundant },
85 { "tiff:PlanarConfiguration", "tiff:PlanarConfiguration", TypeDesc::INT, TiffRedundant },
86 { "tiff:PhotometricInterpretation", "tiff:PhotometricInterpretation", TypeDesc::INT, TiffRedundant },
87 { "tiff:subfiletype", "tiff:subfiletype", TypeDesc::INT, TiffRedundant },
88 { "tiff:Orientation", "Orientation", TypeDesc::INT, TiffRedundant },
89 { "tiff:XResolution", "XResolution", TypeDesc::FLOAT, Rational|TiffRedundant },
90 { "tiff:YResolution", "YResolution", TypeDesc::FLOAT, Rational|TiffRedundant },
91 { "tiff:ResolutionUnit", "ResolutionUnit", TypeDesc::INT, TiffRedundant },
92 { "tiff:Artist", "Artist", TypeDesc::STRING, 0 },
93 { "tiff:Copyright", "Copyright", TypeDesc::STRING, 0 },
94 { "tiff:DateTime", "DateTime", TypeDesc::STRING, DateConversion },
95 { "tiff:ImageDescription", "ImageDescription", TypeDesc::STRING, 0 },
96 { "tiff:Make", "Make", TypeDesc::STRING, 0 },
97 { "tiff:Model", "Model", TypeDesc::STRING, 0 },
98 { "tiff:Software", "Software", TypeDesc::STRING, TiffRedundant },
99
100 { "exif:ColorSpace", "Exif:ColorSpace", TypeDesc::INT, ExifRedundant },
101 { "exif:PixelXDimension", "", TypeDesc::INT, ExifRedundant|TiffRedundant},
102 { "exif:PixelYDimension", "", TypeDesc::INT, ExifRedundant|TiffRedundant },
103 { "exifEX:PhotographicSensitivity", "Exif:ISOSpeedRatings", TypeDesc::INT, ExifRedundant },
104
105 { "xmp:CreateDate", "DateTime", TypeDesc::STRING, DateConversion|TiffRedundant },
106 { "xmp:CreatorTool", "Software", TypeDesc::STRING, TiffRedundant },
107 { "xmp:Label", "IPTC:Label", TypeDesc::STRING, 0 },
108 { "xmp:MetadataDate", "IPTC:MetadataDate", TypeDesc::STRING, DateConversion },
109 { "xmp:ModifyDate", "IPTC:ModifyDate", TypeDesc::STRING, DateConversion },
110 { "xmp:Rating", "IPTC:Rating", TypeDesc::INT, 0 },
111
112 { "xmpMM:DocumentID", "IPTC:DocumentID", TypeDesc::STRING, 0 },
113 { "xmpMM:History", "ImageHistory", TypeDesc::STRING, IsSeq|Suppress },
114 { "xmpMM:InstanceID", "IPTC:InstanceID", TypeDesc::STRING, 0 },
115 { "xmpMM:OriginalDocumentID", "IPTC:OriginalDocumentID", TypeDesc::STRING, 0 },
116
117 { "xmpRights:Marked", "IPTC:CopyrightStatus", TypeDesc::INT, IsBool },
118 { "xmpRights:WebStatement", "IPTC:CopyrightInfoURL", TypeDesc::STRING, 0 },
119 { "xmpRights:UsageTerms", "IPTC:RightsUsageTerms", TypeDesc::STRING, 0 },
120
121 { "dc:format", "", TypeDesc::STRING, TiffRedundant|Suppress },
122 { "dc:Description", "ImageDescription", TypeDesc::STRING, TiffRedundant },
123 { "dc:Creator", "Artist", TypeDesc::STRING, TiffRedundant },
124 { "dc:Rights", "Copyright", TypeDesc::STRING, TiffRedundant },
125 { "dc:title", "IPTC:ObjectName", TypeDesc::STRING, 0 },
126 { "dc:subject", "Keywords", TypeDesc::STRING, IsList },
127 { "dc:keywords", "Keywords", TypeDesc::STRING, IsList },
128
129 { "Iptc4xmpCore:IntellectualGenre", "IPTC:IntellectualGenre", TypeDesc::STRING, 0 },
130 { "Iptc4xmpCore:CountryCode", "IPTC:CountryCode", TypeDesc::STRING, 0 },
131 { "Iptc4xmpCore:CreatorContactInfo", "IPTC:CreatorContactInfo", TypeDesc::STRING, 0 },
132 { "Iptc4xmpCore:ContactInfoDetails", "IPTC:Contact", TypeDesc::STRING, 0 },
133 { "Iptc4xmpCore:CiAdrExtadr", "IPTC:ContactInfoAddress", TypeDesc::STRING, 0 },
134 { "Iptc4xmpCore:CiAdrCity", "IPTC:ContactInfoCity", TypeDesc::STRING, 0 },
135 { "Iptc4xmpCore:CiAdrRegion", "IPTC:ContactInfoState", TypeDesc::STRING, 0 },
136 { "Iptc4xmpCore:CiAdrPcode", "IPTC:ContactInfoPostalCode", TypeDesc::STRING, 0 },
137 { "Iptc4xmpCore:CiAdrCtry", "IPTC:ContactInfoCountry", TypeDesc::STRING, 0 },
138 { "Iptc4xmpCore:CiEmailWork", "IPTC:ContactInfoEmail", TypeDesc::STRING, 0 },
139 { "Iptc4xmpCore:CiTelWork", "IPTC:ContactInfoPhone", TypeDesc::STRING, 0 },
140 { "Iptc4xmpCore:CiUrlWork", "IPTC:ContactInfoURL", TypeDesc::STRING, 0 },
141 { "Iptc4xmpCore:Location", "IPTC:Sublocation", TypeDesc::STRING, 0 },
142 { "Iptc4xmpCore:SubjectCode", "IPTC:SubjectCode", TypeDesc::STRING, IsList },
143 { "Iptc4xmpCore:Scene", "IPTC:SceneCode", TypeDesc::STRING, IsList },
144 { "Iptc4xmpExt:PersonInImage", "IPTC:PersonInImage", TypeDesc::STRING, IsList },
145
146 { "aux:Firmware", "aux:Firmware", TypeDesc::STRING, 0},
147
148 { "crs:AutoBrightness", "crs:AutoBrightness" , TypeDesc::INT, IsBool },
149 { "crs:AutoContrast", "crs:AutoContrast" , TypeDesc::INT, IsBool },
150 { "crs:AutoExposure", "crs:AutoExposure" , TypeDesc::INT, IsBool },
151 { "crs:AutoShadows", "crs:AutoShadows" , TypeDesc::INT, IsBool },
152 { "crs:BlueHue", "crs:BlueHue" , TypeDesc::INT, 0 },
153 { "crs:BlueSaturation", "crs:BlueSaturation" , TypeDesc::INT, 0 },
154 { "crs:Brightness", "crs:Brightness" , TypeDesc::INT, 0 },
155 { "crs:CameraProfile", "crs:CameraProfile" , TypeDesc::STRING, 0 },
156 { "crs:ChromaticAberrationB", "crs:ChromaticAberrationB" , TypeDesc::INT, 0 },
157 { "crs:ChromaticAberrationR", "crs:ChromaticAberrationR" , TypeDesc::INT, 0 },
158 { "crs:ColorNoiseReduction", "crs:ColorNoiseReduction" , TypeDesc::INT, 0 },
159 { "crs:Contrast", "crs:Contrast", TypeDesc::INT, 0 },
160 { "crs:CropTop", "crs:CropTop", TypeDesc::FLOAT, 0 },
161 { "crs:CropLeft", "crs:CropLeft", TypeDesc::FLOAT, 0 },
162 { "crs:CropBottom", "crs:CropBottom", TypeDesc::FLOAT, 0 },
163 { "crs:CropRight", "crs:CropRight", TypeDesc::FLOAT, 0 },
164 { "crs:CropAngle", "crs:CropAngle", TypeDesc::FLOAT, 0 },
165 { "crs:CropWidth", "crs:CropWidth", TypeDesc::FLOAT, 0 },
166 { "crs:CropHeight", "crs:CropHeight", TypeDesc::FLOAT, 0 },
167 { "crs:CropUnits", "crs:CropUnits", TypeDesc::INT, 0 },
168 { "crs:Exposure", "crs:Exposure", TypeDesc::FLOAT, 0 },
169 { "crs:GreenHue", "crs:GreenHue", TypeDesc::INT, 0 },
170 { "crs:GreenSaturation", "crs:GreenSaturation", TypeDesc::INT, 0 },
171 { "crs:HasCrop", "crs:HasCrop", TypeDesc::INT, IsBool },
172 { "crs:HasSettings", "crs:HasSettings", TypeDesc::INT, IsBool },
173 { "crs:LuminanceSmoothing", "crs:LuminanceSmoothing", TypeDesc::INT, 0 },
174 { "crs:RawFileName", "crs:RawFileName", TypeDesc::STRING, 0 },
175 { "crs:RedHue", "crs:RedHue", TypeDesc::INT, 0 },
176 { "crs:RedSaturation", "crs:RedSaturation", TypeDesc::INT, 0 },
177 { "crs:Saturation", "crs:Saturation", TypeDesc::INT, 0 },
178 { "crs:Shadows", "crs:Shadows", TypeDesc::INT, 0 },
179 { "crs:ShadowTint", "crs:ShadowTint", TypeDesc::INT, 0 },
180 { "crs:Sharpness", "crs:Sharpness", TypeDesc::INT, 0 },
181 { "crs:Temperature", "crs:Temperature", TypeDesc::INT, 0 },
182 { "crs:Tint", "crs:Tint", TypeDesc::INT, 0 },
183 { "crs:ToneCurve", "crs:ToneCurve", TypeDesc::STRING, 0 },
184 { "crs:ToneCurveName", "crs:ToneCurveName", TypeDesc::STRING, 0 },
185 { "crs:Version", "crs:Version", TypeDesc::STRING, 0 },
186 { "crs:VignetteAmount", "crs:VignetteAmount", TypeDesc::INT, 0 },
187 { "crs:VignetteMidpoint", "crs:VignetteMidpoint", TypeDesc::INT, 0 },
188 { "crs:WhiteBalance", "crs:WhiteBalance", TypeDesc::STRING, 0 },
189
190 { "GPano:UsePanoramaViewer", "GPano:UsePanoramaViewer", TypeDesc::INT, IsBool },
191 { "GPano:CaptureSoftware", "GPano:CaptureSoftware", TypeDesc::STRING, 0 },
192 { "GPano:StitchingSoftware", "GPano:StitchingSoftware", TypeDesc::STRING, 0 },
193 { "GPano:ProjectionType", "GPano:ProjectionType", TypeDesc::STRING, 0 },
194 { "GPano:PoseHeadingDegrees", "GPano:PoseHeadingDegrees", TypeDesc::FLOAT, 0 },
195 { "GPano:PosePitchDegrees", "GPano:PosePitchDegrees", TypeDesc::FLOAT, 0 },
196 { "GPano:PoseRollDegrees", "GPano:PoseRollDegrees", TypeDesc::FLOAT, 0 },
197 { "GPano:InitialViewHeadingDegrees", "GPano:InitialViewHeadingDegrees", TypeDesc::INT, 0 },
198 { "GPano:InitialViewPitchDegrees", "GPano:InitialViewPitchDegrees", TypeDesc::INT, 0 },
199 { "GPano:InitialViewRollDegrees", "GPano:InitialViewRollDegrees", TypeDesc::INT, 0 },
200 { "GPano:InitialHorizontalFOVDegrees", "GPano:InitialHorizontalFOVDegrees", TypeDesc::FLOAT, 0 },
201 { "GPano:FirstPhotoDate", "GPano:FirstPhotoDate", TypeDesc::STRING, DateConversion },
202 { "GPano:LastPhotoDate", "GPano:LastPhotoDate", TypeDesc::STRING, DateConversion },
203 { "GPano:SourcePhotosCount", "GPano:SourcePhotosCount", TypeDesc::INT, 0 },
204 { "GPano:ExposureLockUsed", "GPano:ExposureLockUsed", TypeDesc::INT, IsBool },
205 { "GPano:CroppedAreaImageWidthPixels", "GPano:CroppedAreaImageWidthPixels", TypeDesc::INT, 0 },
206 { "GPano:CroppedAreaImageHeightPixels", "GPano:CroppedAreaImageHeightPixels", TypeDesc::INT, 0 },
207 { "GPano:FullPanoWidthPixels", "GPano:FullPanoWidthPixels", TypeDesc::INT, 0 },
208 { "GPano:FullPanoHeightPixels", "GPano:FullPanoHeightPixels", TypeDesc::INT, 0 },
209 { "GPano:CroppedAreaLeftPixels", "GPano:CroppedAreaLeftPixels", TypeDesc::INT, 0 },
210 { "GPano:CroppedAreaTopPixels", "GPano:CroppedAreaTopPixels", TypeDesc::INT, 0 },
211 { "GPano:InitialCameraDolly", "GPano:InitialCameraDolly", TypeDesc::FLOAT, 0 },
212 { "GPano:LargestValidInteriorRectWidth", "GPano:LargestValidInteriorRectWidth", TypeDesc::INT, 0 },
213 { "GPano:LargestValidInteriorRectHeight", "GPano:LargestValidInteriorRectHeight", TypeDesc::INT, 0 },
214 { "GPano:LargestValidInteriorRectTop", "GPano:LargestValidInteriorRectTop", TypeDesc::INT, 0 },
215 { "GPano:LargestValidInteriorRectLeft", "GPano:LargestValidInteriorRectLeft", TypeDesc::INT, 0 },
216
217 { "rdf:li", "" }, // ignore these strays
218 { nullptr, nullptr }
219 // clang-format on
220 };
221
222
223
224 class XMPtagMap {
225 typedef boost::container::flat_map<std::string, const XMPtag*> tagmap_t;
226 // Key is lower case so it's effectively case-insensitive
227 public:
XMPtagMap(const XMPtag * tag_table)228 XMPtagMap(const XMPtag* tag_table)
229 {
230 for (const XMPtag* t = &tag_table[0]; t->xmpname; ++t) {
231 std::string lower(t->xmpname);
232 Strutil::to_lower(lower);
233 m_tagmap[lower] = t;
234 }
235 }
236
find(string_view name) const237 const XMPtag* find(string_view name) const
238 {
239 std::string lower = name;
240 Strutil::to_lower(lower);
241 tagmap_t::const_iterator i = m_tagmap.find(lower);
242 return i == m_tagmap.end() ? nullptr : i->second;
243 }
244
245 private:
246 tagmap_t m_tagmap;
247 };
248
249 static XMPtagMap&
xmp_tagmap_ref()250 xmp_tagmap_ref()
251 {
252 static XMPtagMap T(xmptag);
253 return T;
254 }
255
256
257
258 // Does it look like the string representation of a rational value?
259 inline bool
string_is_rational(string_view s)260 string_is_rational(string_view s)
261 {
262 int n;
263 return Strutil::parse_int(s, n) && Strutil::parse_char(s, '/')
264 && Strutil::string_is_int(s);
265 }
266
267
268
269 inline bool
parse_rational(string_view s,int & n,int & d)270 parse_rational(string_view s, int& n, int& d)
271 {
272 return Strutil::parse_int(s, n) && Strutil::parse_char(s, '/')
273 && Strutil::parse_int(s, d);
274 }
275
276
277
278 // Utility: add an attribute to the spec with the given xml name and
279 // value. Search for it in xmptag, and if found that will tell us what
280 // the type is supposed to be, as well as any special handling. If not
281 // found in the table, add it as a string and hope for the best.
282 // Return value is the size of the resulting attribute (can be used to
283 // catch runaway or corrupt XML).
284 static size_t
add_attrib(ImageSpec & spec,string_view xmlname,string_view xmlvalue,bool attribIsSeq=false)285 add_attrib(ImageSpec& spec, string_view xmlname, string_view xmlvalue,
286 bool attribIsSeq = false)
287 {
288 #if DEBUG_XMP_READ
289 std::cerr << "add_attrib " << xmlname << ": '" << xmlvalue << "'\n";
290 #endif
291 std::string oiioname = xmlname;
292 TypeDesc oiiotype;
293 int special = NothingSpecial;
294
295 // See if it's in the xmp table, which will tell us something about the
296 // proper type (everything in the xml itself just looks like a string).
297 if (const XMPtag* xt = xmp_tagmap_ref().find(xmlname)) {
298 if (!xt->oiioname || !xt->oiioname[0])
299 return 0; // ignore it purposefully
300 // Found
301 oiioname = xt->oiioname;
302 oiiotype = xt->oiiotype;
303 special = xt->special;
304 }
305
306 // Also try looking it up to see if it's a known exif tag.
307 int tag = -1, tifftype = -1, count = 0;
308 if (Strutil::istarts_with(xmlname, "Exif:")
309 && (exif_tag_lookup(xmlname, tag, tifftype, count)
310 || exif_tag_lookup(xmlname.substr(5), tag, tifftype, count))) {
311 // It's a known Exif name
312 if (tifftype == TIFF_SHORT && count == 1)
313 oiiotype = TypeDesc::UINT;
314 else if (tifftype == TIFF_LONG && count == 1)
315 oiiotype = TypeDesc::UINT;
316 else if ((tifftype == TIFF_RATIONAL || tifftype == TIFF_SRATIONAL)
317 && count == 1) {
318 oiiotype = TypeDesc::FLOAT;
319 special = Rational;
320 } else if (tifftype == TIFF_ASCII)
321 oiiotype = TypeDesc::STRING;
322 else if (tifftype == TIFF_BYTE && count == 1)
323 oiiotype = TypeDesc::INT;
324 else if (tifftype == TIFF_NOTYPE)
325 return 0; // skip
326 }
327
328 // Guess the type if unknown
329 if (oiiotype == TypeUnknown) {
330 if (Strutil::string_is_int(xmlvalue))
331 oiiotype = TypeInt;
332 else if (Strutil::string_is_float(xmlvalue))
333 oiiotype = TypeFloat;
334 else
335 oiiotype = TypeString;
336 if (attribIsSeq)
337 special |= IsSeq;
338 }
339
340 if (oiiotype == TypeDesc::STRING) {
341 std::string val;
342 if (special & (IsList | IsSeq)) {
343 // Special case -- append it to a list
344 std::vector<string_view> items;
345 ParamValue* p = spec.find_attribute(oiioname, TypeDesc::STRING);
346 bool dup = false;
347 if (p) {
348 items = Strutil::splitsv(*(const char**)p->data(), ";");
349 for (auto& item : items) {
350 item = Strutil::strip(item);
351 dup |= (item == xmlvalue);
352 }
353 dup |= (xmlvalue == (*(const char**)p->data()));
354 }
355 if (!dup)
356 items.emplace_back(xmlvalue);
357 val = Strutil::join(items, "; ");
358 } else {
359 val = xmlvalue;
360 }
361 spec.attribute(oiioname, val);
362 return val.size();
363 } else if (oiiotype == TypeRational || string_is_rational(xmlname)) {
364 int val[2];
365 if (parse_rational(xmlvalue, val[0], val[1]))
366 spec.attribute(xmlname, TypeRational, &val[0]);
367 return sizeof(val);
368 } else if (oiiotype == TypeDesc::INT) {
369 std::vector<int> vals;
370 if ((special & (IsList | IsSeq))
371 && spec.extra_attribs.contains(xmlname))
372 vals = spec.extra_attribs[xmlname].as_vec<int>();
373 if (special & IsBool)
374 vals.push_back((int)Strutil::iequals(xmlvalue, "true"));
375 else // ordinary int
376 vals.push_back(Strutil::stoi(xmlvalue));
377 TypeDesc t = oiiotype;
378 if (vals.size() > 1)
379 t.arraylen = vals.size();
380 spec.attribute(oiioname, t, vals.data());
381 return vals.size() * sizeof(int);
382 } else if (oiiotype == TypeDesc::UINT) {
383 spec.attribute(oiioname, Strutil::from_string<unsigned int>(xmlvalue));
384 return sizeof(unsigned int);
385 } else if (oiiotype == TypeDesc::FLOAT) {
386 std::vector<float> vals;
387 if ((special & (IsList | IsSeq))
388 && spec.extra_attribs.contains(xmlname))
389 vals = spec.extra_attribs[xmlname].as_vec<float>();
390 vals.push_back(Strutil::stof(xmlvalue));
391 TypeDesc t = oiiotype;
392 if (vals.size() > 1)
393 t.arraylen = vals.size();
394 spec.attribute(oiioname, t, vals.data());
395 return vals.size() * sizeof(float);
396 }
397 #if (!defined(NDEBUG) || DEBUG_XMP_READ)
398 else {
399 std::cerr << "iptc xml add_attrib unknown type " << xmlname << ' '
400 << oiiotype.c_str() << "\n";
401 }
402 #endif
403
404 // Catch-all for unrecognized things -- just add them as a string!
405 spec.attribute(xmlname, xmlvalue);
406 return xmlvalue.size();
407 }
408
409
410
411 // Utility: Search str for the first substring in str (starting from
412 // position pos) that starts with startmarker and ends with endmarker.
413 // If not found, return false. If found, return true, store the
414 // beginning and ending indices in startpos and endpos.
415 static bool
extract_middle(string_view str,size_t pos,string_view startmarker,string_view endmarker,size_t & startpos,size_t & endpos)416 extract_middle(string_view str, size_t pos, string_view startmarker,
417 string_view endmarker, size_t& startpos, size_t& endpos)
418 {
419 startpos = str.find(startmarker, pos);
420 if (startpos == std::string::npos)
421 return false; // start marker not found
422 endpos = str.find(endmarker, startpos);
423 if (endpos == std::string::npos)
424 return false; // end marker not found
425 endpos += endmarker.size();
426 return true;
427 }
428
429
430 // Decode one XMP node and its children.
431 // Return value is the size of the resulting attribute (can be used to
432 // catch runaway or corrupt XML).
433 static size_t
decode_xmp_node(pugi::xml_node node,ImageSpec & spec,int level=1,const char * parentname=NULL,bool isList=false)434 decode_xmp_node(pugi::xml_node node, ImageSpec& spec, int level = 1,
435 const char* parentname = NULL, bool isList = false)
436 {
437 std::string mylist; // will accumulate for list items
438 size_t totalsize = 0;
439 for (; node; node = node.next_sibling()) {
440 #if DEBUG_XMP_READ
441 std::cerr << "Level " << level << " " << node.name() << " = "
442 << node.value() << "\n";
443 #endif
444 // First, encode all attributes of this node
445 for (pugi::xml_attribute attr = node.first_attribute(); attr;
446 attr = attr.next_attribute()) {
447 #if DEBUG_XMP_READ
448 std::cerr << " level " << level << " parent "
449 << (parentname ? parentname : "-") << " attr "
450 << attr.name() << ' ' << attr.value() << "\n";
451 #endif
452 if (Strutil::istarts_with(attr.name(), "xml:")
453 || Strutil::istarts_with(attr.name(), "xmlns:"))
454 continue; // xml attributes aren't image metadata
455 if (attr.name()[0] && attr.value()[0]) {
456 auto sz = add_attrib(spec, attr.name(), attr.value(), isList);
457 totalsize += sz;
458 // As a guard against runaway lists or corrupt XMP blocks,
459 // don't let attribute lists grow to more than 64KB each.
460 if (sz > 64 * 1024)
461 break;
462 }
463 }
464 if (Strutil::iequals(node.name(), "xmpMM::History")) {
465 // FIXME -- image history is complicated. Come back to it.
466 continue;
467 }
468 if (Strutil::iequals(node.name(), "photoshop:DocumentAncestors")) {
469 // This tag is nothing but trouble. Some images can have
470 // literally MBs in them, placed there by Photoshop as the
471 // result of certain cut-and-paste operations, but serving no
472 // discernable purpose. Just skip it. See also:
473 // https://prepression.blogspot.com/2017/06/metadata-bloat-photoshopdocumentancestors.html
474 // https://feedback.photoshop.com/conversations/photoshop/photoshop-corrupt-ancestors-tag-in-xmp-causing-giant-file-sizes/5f5f45f74b561a3d426ba97f
475 continue;
476 }
477 if (Strutil::iequals(node.name(), "rdf:Bag")
478 || Strutil::iequals(node.name(), "rdf:Seq")
479 || Strutil::iequals(node.name(), "rdf:Alt")
480 || Strutil::iequals(node.name(), "rdf:li")) {
481 // Various kinds of lists. Recurse, pass the parent name
482 // down, and let the child know it's part of a list.
483 totalsize += decode_xmp_node(node.first_child(), spec, level + 1,
484 parentname, true);
485 } else {
486 // Not a list, but it's got children. Recurse.
487 totalsize += decode_xmp_node(node.first_child(), spec, level + 1,
488 node.name(), isList);
489 }
490
491 // If this node has a value but no name, it's definitely part
492 // of a list. Accumulate the list items, separated by semicolons.
493 if (parentname && !node.name()[0] && node.value()[0]) {
494 totalsize -= mylist.size();
495 if (mylist.size())
496 mylist += ";";
497 mylist += node.value();
498 totalsize += mylist.size();
499 }
500 // As a guard against runaway lists or corrupt XMP blocks,
501 // don't let attribute lists grow to more than 64KB each.
502 if (isList && totalsize > 64 * 1024)
503 break;
504 }
505
506 // If we have accumulated a list, turn it into an attribute
507 if (parentname && mylist.size()) {
508 totalsize += add_attrib(spec, parentname, mylist, true);
509 }
510 return totalsize;
511 }
512
513
514 } // anonymous namespace
515
516
517
518 // DEPRECATED(2.1)
519 bool
decode_xmp(const std::string & xml,ImageSpec & spec)520 decode_xmp(const std::string& xml, ImageSpec& spec)
521 {
522 return decode_xmp(string_view(xml), spec);
523 }
524
525
526
527 // DEPRECATED(2.1)
528 bool
decode_xmp(const char * xml,ImageSpec & spec)529 decode_xmp(const char* xml, ImageSpec& spec)
530 {
531 return decode_xmp(string_view(xml), spec);
532 }
533
534
535
536 bool
decode_xmp(cspan<uint8_t> xml,ImageSpec & spec)537 decode_xmp(cspan<uint8_t> xml, ImageSpec& spec)
538 {
539 return decode_xmp(string_view((const char*)xml.data(), xml.size()), spec);
540 }
541
542
543
544 bool
decode_xmp(string_view xml,ImageSpec & spec)545 decode_xmp(string_view xml, ImageSpec& spec)
546 {
547 #if DEBUG_XMP_READ
548 Timer timer;
549 std::cerr << "XMP size is " << xml.size() << "\n";
550 std::cerr << "XMP dump:\n---\n" << xml.substr(0, 4096) << "\n---\n";
551 #endif
552 if (!xml.length())
553 return true;
554 for (size_t startpos = 0, endpos = 0;
555 extract_middle(xml, endpos, "<rdf:Description", "</rdf:Description>",
556 startpos, endpos);) {
557 // Turn that middle section into an XML document
558 string_view rdf = xml.substr(startpos, endpos - startpos); // scooch in
559 #if DEBUG_XMP_READ
560 std::cerr << "RDF is:\n---\n" << rdf.substr(0, 4096) << "\n---\n";
561 #endif
562 pugi::xml_document doc;
563 pugi::xml_parse_result parse_result
564 = doc.load_buffer(rdf.data(), rdf.size(),
565 pugi::parse_default | pugi::parse_fragment);
566 if (!parse_result) {
567 #if DEBUG_XMP_READ
568 std::cerr << "Error parsing XML @" << parse_result.offset << ": "
569 << parse_result.description() << "\n";
570 #endif
571 // Instead of returning early here if there were errors parsing
572 // the XML -- I have noticed that very minor XML malformations
573 // are common in XMP found in files -- hope for the best and
574 // go ahead and assume that maybe it managed to put something
575 // useful in the resulting document.
576 #if 0
577 return true;
578 #endif
579 }
580 // Decode the contents of the XML document (it will recurse)
581 decode_xmp_node(doc.first_child(), spec);
582 }
583 #if DEBUG_XMP_READ
584 std::cerr << "XMP total parse time " << timer() << "\n";
585 #endif
586
587 return true;
588 }
589
590
591
592 // Turn one ParamValue (whose xmp info we know) into a properly
593 // serialized xmp string.
594 static std::string
stringize(const ParamValueList::const_iterator & p,const XMPtag & xmptag)595 stringize(const ParamValueList::const_iterator& p, const XMPtag& xmptag)
596 {
597 if (p->type() == TypeDesc::STRING) {
598 if (xmptag.special & DateConversion) {
599 // FIXME -- convert to yyyy-mm-ddThh:mm:ss.sTZD
600 // return std::string();
601 }
602 return std::string(*(const char**)p->data());
603 } else if (p->type() == TypeDesc::INT) {
604 if (xmptag.special & IsBool)
605 return *(const int*)p->data() ? "True" : "False";
606 else // ordinary int
607 return Strutil::sprintf("%d", *(const int*)p->data());
608 } else if (p->type() == TypeDesc::FLOAT) {
609 if (xmptag.special & Rational) {
610 unsigned int num, den;
611 float_to_rational(*(const float*)p->data(), num, den);
612 return Strutil::sprintf("%d/%d", num, den);
613 } else {
614 return Strutil::sprintf("%g", *(const float*)p->data());
615 }
616 }
617 return std::string();
618 }
619
620
621
622 static void
gather_xmp_attribs(const ImageSpec & spec,std::vector<std::pair<const XMPtag *,std::string>> & list)623 gather_xmp_attribs(const ImageSpec& spec,
624 std::vector<std::pair<const XMPtag*, std::string>>& list)
625 {
626 // Loop over all params...
627 for (ParamValueList::const_iterator p = spec.extra_attribs.begin();
628 p != spec.extra_attribs.end(); ++p) {
629 // For this param, see if there's a table entry with a matching
630 // name, where the xmp name is in the right category.
631 const XMPtag* tag = xmp_tagmap_ref().find(p->name());
632 if (tag) {
633 if (!Strutil::iequals(p->name(), tag->oiioname))
634 continue; // Name doesn't match
635 if (tag->special & Suppress) {
636 break; // Purposely suppressing
637 }
638 std::string s = stringize(p, *tag);
639 if (s.size()) {
640 list.emplace_back(tag, s);
641 //std::cerr << " " << tag->xmpname << " = " << s << "\n";
642 }
643 }
644 }
645 }
646
647
648
649 enum XmpControl {
650 XMP_suppress,
651 XMP_nodes,
652 XMP_attribs,
653 XMP_SeqList, // sequential list
654 XMP_BagList, // unordered list
655 XMP_AltList // alternate list, WTF is that?
656 };
657
658
659 // Turn an entire category of XMP items into a properly serialized
660 // xml fragment.
661 static std::string
encode_xmp_category(std::vector<std::pair<const XMPtag *,std::string>> & list,const char * xmlnamespace,const char * pattern,const char * exclude_pattern,const char * nodename,const char * url,bool minimal,XmpControl control)662 encode_xmp_category(std::vector<std::pair<const XMPtag*, std::string>>& list,
663 const char* xmlnamespace, const char* pattern,
664 const char* exclude_pattern, const char* nodename,
665 const char* url, bool minimal, XmpControl control)
666 {
667 std::string category = std::string(xmlnamespace) + ':';
668 std::string xmp;
669 std::string xmp_minimal;
670
671 #if DEBUG_XMP_WRITE
672 std::cerr << "Category " << xmlnamespace << ", pattern '" << pattern
673 << "'\n";
674 #endif
675 // Loop over all params...
676 bool found = false;
677 for (size_t li = 0; li < list.size(); ++li) {
678 // For this param, see if there's a table entry with a matching
679 // name, where the xmp name is in the right category.
680 const XMPtag* tag = list[li].first;
681 const std::string& val(list[li].second);
682 const char* xmpname(tag->xmpname);
683 if (control == XMP_attribs && (tag->special & (IsList | IsSeq)))
684 continue; // Skip lists for attrib output
685 if (exclude_pattern && exclude_pattern[0]
686 && Strutil::istarts_with(xmpname, exclude_pattern)) {
687 continue;
688 }
689 if (Strutil::istarts_with(xmpname, pattern)) {
690 std::string x;
691 if (control == XMP_attribs)
692 x = Strutil::sprintf("%s=\"%s\"", xmpname, val);
693 else if (control == XMP_AltList || control == XMP_BagList) {
694 std::vector<std::string> vals;
695 Strutil::split(val, vals, ";");
696 for (auto& val : vals) {
697 val = Strutil::strip(val);
698 x += Strutil::sprintf("<rdf:li>%s</rdf:li>", val);
699 }
700 } else
701 x = Strutil::sprintf("<%s>%s</%s>", xmpname, val, xmpname);
702 if (!x.empty() && control != XMP_suppress) {
703 if (!found) {
704 // if (nodename && nodename[0]) {
705 // x = Strutil::sprintf("<%s ", nodename);
706 // }
707 }
708 if (minimal
709 && (tag->special & (TiffRedundant | ExifRedundant))) {
710 if (xmp_minimal.size())
711 xmp_minimal += ' ';
712 xmp_minimal += x;
713 } else {
714 if (xmp.size())
715 xmp += ' ';
716 xmp += x;
717 }
718 found = true;
719 #if DEBUG_XMP_WRITE
720 std::cerr << " going to output '" << x << "'\n";
721 #endif
722 }
723 #if DEBUG_XMP_WRITE
724 else
725 std::cerr << " NOT going to output '" << x << "'\n";
726 #endif
727 list.erase(list.begin() + li);
728 --li;
729 }
730 }
731
732 if (xmp.length() && xmp_minimal.length())
733 xmp += ' ' + xmp_minimal;
734
735 #if 1
736 if (xmp.length()) {
737 if (control == XMP_BagList)
738 xmp = Strutil::sprintf("<%s><rdf:Bag> %s </rdf:Bag></%s>",
739 nodename ? nodename : xmlnamespace, xmp,
740 nodename ? nodename : xmlnamespace);
741 else if (control == XMP_SeqList)
742 xmp = Strutil::sprintf("<%s><rdf:Seq> %s </rdf:Seq></%s>",
743 nodename ? nodename : xmlnamespace, xmp,
744 nodename ? nodename : xmlnamespace);
745 else if (control == XMP_AltList)
746 xmp = Strutil::sprintf("<%s><rdf:Alt> %s </rdf:Alt></%s>",
747 nodename ? nodename : xmlnamespace, xmp,
748 nodename ? nodename : xmlnamespace);
749 # if 0
750 else if (control == XMP_nodes)
751 xmp = Strutil::sprintf("<%s>%s</%s>",
752 nodename ? nodename : xmlnamespace, xmp,
753 nodename ? nodename : xmlnamespace);
754 # endif
755
756 std::string r;
757 r += Strutil::sprintf("<rdf:Description rdf:about=\"\" "
758 "xmlns:%s=\"%s\"%s",
759 xmlnamespace, url,
760 (control == XMP_attribs) ? " " : ">");
761 r += xmp;
762 if (control == XMP_attribs)
763 r += "/> "; // end the <rdf:Description...
764 else
765 r += " </rdf:Description>";
766 return r;
767 }
768 #endif
769
770 #if DEBUG_XMP_WRITE
771 std::cerr << " Nothing to output\n";
772 #endif
773 return std::string();
774 }
775
776
777
778 std::string
encode_xmp(const ImageSpec & spec,bool minimal)779 encode_xmp(const ImageSpec& spec, bool minimal)
780 {
781 std::vector<std::pair<const XMPtag*, std::string>> list;
782 gather_xmp_attribs(spec, list);
783
784 std::string xmp;
785
786 #if 1
787 // This stuff seems to work
788 xmp += encode_xmp_category(list, "photoshop", "photoshop:", NULL, NULL,
789 "http://ns.adobe.com/photoshop/1.0/", minimal,
790 XMP_attribs);
791 xmp += encode_xmp_category(list, "xmp", "xmp:Rating", NULL, NULL,
792 "http://ns.adobe.com/xap/1.0/", minimal,
793 XMP_attribs);
794 xmp += encode_xmp_category(list, "xmp", "xmp:CreateDate", NULL, NULL,
795 "http://ns.adobe.com/xap/1.0/", false,
796 XMP_attribs);
797 xmp += encode_xmp_category(list, "xmp", "xmp:ModifyDate", NULL, NULL,
798 "http://ns.adobe.com/xap/1.0/", false,
799 XMP_attribs);
800 xmp += encode_xmp_category(list, "xmp", "xmp:MetadataDate", NULL, NULL,
801 "http://ns.adobe.com/xap/1.0/", false,
802 XMP_attribs);
803 xmp += encode_xmp_category(list, "xmpRights", "xmpRights:UsageTerms", NULL,
804 "xmpRights:UsageTerms",
805 "http://ns.adobe.com/xap/1.0/rights/", minimal,
806 XMP_AltList);
807 xmp += encode_xmp_category(list, "xmpRights", "xmpRights:", NULL, NULL,
808 "http://ns.adobe.com/xap/1.0/rights/", minimal,
809 XMP_attribs);
810 xmp += encode_xmp_category(list, "dc", "dc:subject", NULL, "dc:subject",
811 "http://purl.org/dc/elements/1.1/", minimal,
812 XMP_BagList);
813 xmp += encode_xmp_category(list, "Iptc4xmpCore", "Iptc4xmpCore:SubjectCode",
814 NULL, "Iptc4xmpCore:SubjectCode",
815 "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/",
816 false, XMP_BagList);
817 xmp += encode_xmp_category(list, "Iptc4xmpCore",
818 "Iptc4xmpCore:", "Iptc4xmpCore:Ci", NULL,
819 "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/",
820 minimal, XMP_attribs);
821 xmp += encode_xmp_category(list, "Iptc4xmpCore", "Iptc4xmpCore:Ci", NULL,
822 "Iptc4xmpCore:CreatorContactInfo",
823 "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/",
824 minimal, XMP_attribs);
825 xmp += encode_xmp_category(list, "Iptc4xmpCore", "Iptc4xmpCore:Scene", NULL,
826 "Iptc4xmpCore:Scene",
827 "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/",
828 minimal, XMP_BagList);
829
830 xmp += encode_xmp_category(list, "xmpMM", "xmpMM:", NULL, NULL,
831 "http://ns.adobe.com/xap/1.0/mm/", minimal,
832 XMP_attribs);
833 xmp += encode_xmp_category(list, "GPano", "GPano:", NULL, NULL,
834 "http://ns.google.com/photos/1.0/panorama/",
835 minimal, XMP_attribs);
836 xmp += encode_xmp_category(list, "crs", "crs:", NULL, NULL,
837 "http://ns.adobe.com/camera-raw-settings/1.0/",
838 minimal, XMP_attribs);
839 #endif
840
841 xmp += encode_xmp_category(list, "xmp", "xmp:", NULL, NULL,
842 "http://ns.adobe.com/xap/1.0/", minimal,
843 XMP_nodes);
844
845 xmp += encode_xmp_category(list, "tiff", "tiff:", NULL, NULL,
846 "http://ns.adobe.com/tiff/1.0/", minimal,
847 XMP_attribs);
848 #if 0
849 // Doesn't work yet
850 xmp += encode_xmp_category (list, "xapRights", "xapRights:", NULL, NULL,
851 "http://ns.adobe.com/xap/1.0/rights/", minimal, XMP_attribs);
852 // xmp += encode_xmp_category (list, "dc", "dc:", NULL, NULL,
853 // "http://purl.org/dc/elements/1.1/", minimal, XMP_attribs);
854
855 #endif
856
857 // FIXME exif xmp stRef stVer stJob xmpDM
858
859 if (!xmp.empty()) {
860 std::string head(
861 "<?xpacket begin=\"\xEF\xBB\xBF\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?> "
862 "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\" x:xmptk=\"Adobe XMP Core 5.5-c002 1.148022, 2012/07/15-18:06:45 \"> <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"> ");
863 std::string foot(" </rdf:RDF> </x:xmpmeta> <?xpacket end=\"w\"?>");
864 xmp = head + xmp + foot;
865 }
866
867
868 #if DEBUG_XMP_WRITE
869 std::cerr << "xmp to write = \n---\n" << xmp << "\n---\n";
870 std::cerr << "\n\nHere's what I still haven't output:\n";
871 for (size_t i = 0; i < list.size(); ++i)
872 std::cerr << list[i].first->xmpname << "\n";
873 #endif
874
875 return xmp;
876 }
877
878
879 OIIO_NAMESPACE_END
880