1 // Copyright 2008-present Contributors to the OpenImageIO project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/OpenImageIO/oiio/blob/master/LICENSE.md
4 
5 
6 #include <iostream>
7 
8 #include <boost/container/flat_map.hpp>
9 
10 #include <OpenImageIO/fmath.h>
11 #include <OpenImageIO/imageio.h>
12 #include <OpenImageIO/strutil.h>
13 #include <OpenImageIO/tiffutils.h>
14 #include <OpenImageIO/timer.h>
15 
16 extern "C" {
17 #include "tiff.h"
18 }
19 
20 #if USE_EXTERNAL_PUGIXML
21 #    include <pugixml.hpp>
22 #else
23 #    include <OpenImageIO/detail/pugixml/pugixml.hpp>
24 #endif
25 
26 #define DEBUG_XMP_READ 0
27 #define DEBUG_XMP_WRITE 0
28 
29 #define MY_ENCODING "ISO-8859-1"
30 
31 OIIO_NAMESPACE_BEGIN
32 
33 namespace {  // anonymous
34 
35 
36 // Define special processing flags -- they're individual bits so can be
37 // combined with '|'
38 enum XMPspecial {
39     NothingSpecial = 0,
40     Rational       = 1,   // It needs to be expressed as A/B
41     DateConversion = 2,   // It's a date, may need conversion to canonical form
42     TiffRedundant  = 4,   // It's something that's part of normal TIFF tags
43     ExifRedundant  = 8,   // It's something included in Exif
44     Suppress       = 16,  // Explicitly suppress it from XMP
45     IsList         = 32,  // Make a semicolon-separated list out of it
46     IsSeq          = 64,  // Like List, but order matters
47     IsBool         = 128  // Should be output as True/False
48 };
49 
50 struct XMPtag {
51     const char* xmpname;   // XMP name
52     const char* oiioname;  // Attribute name we use
53     TypeDesc oiiotype;     // Type we use
54     int special;           // Special handling
55 
XMPtag__anon1103262b0111::XMPtag56     XMPtag(const char* xname, const char* oname, TypeDesc type = TypeUnknown,
57            int spec = 0)
58         : xmpname(xname)
59         , oiioname(oname)
60         , oiiotype(type)
61         , special(spec)
62     {
63     }
64 };
65 
66 static XMPtag xmptag[] = {
67     // clang-format off
68     { "photoshop:AuthorsPosition", "IPTC:AuthorsPosition", TypeDesc::STRING, 0 },
69     { "photoshop:CaptionWriter", "IPTC:CaptionWriter", TypeDesc::STRING, 0 },
70     { "photoshop:Category", "IPTC:Category", TypeDesc::STRING, 0 },
71     { "photoshop:City", "IPTC:City", TypeDesc::STRING, 0 },
72     { "photoshop:Country", "IPTC:Country", TypeDesc::STRING, 0 },
73     { "photoshop:Credit", "IPTC:Provider", TypeDesc::STRING, 0 },
74     { "photoshop:DateCreated", "DateTime", TypeDesc::STRING, DateConversion|TiffRedundant },
75     { "photoshop:Headline", "IPTC:Headline", TypeDesc::STRING, 0 },
76     { "photoshop:History", "ImageHistory", TypeDesc::STRING, 0 },
77     { "photoshop:Instructions", "IPTC:Instructions", TypeDesc::STRING, 0 },
78     { "photoshop:Source", "IPTC:Source", TypeDesc::STRING, 0 },
79     { "photoshop:State", "IPTC:State", TypeDesc::STRING, 0 },
80     { "photoshop:SupplementalCategories", "IPTC:SupplementalCategories", TypeDesc::STRING, IsList|Suppress },  // FIXME -- un-suppress when we have it working
81     { "photoshop:TransmissionReference", "IPTC:TransmissionReference", TypeDesc::STRING, 0 },
82     { "photoshop:Urgency", "photoshop:Urgency", TypeDesc::INT, 0 },
83 
84     { "tiff:Compression", "tiff:Compression", TypeDesc::INT, TiffRedundant },
85     { "tiff:PlanarConfiguration", "tiff:PlanarConfiguration", TypeDesc::INT, TiffRedundant },
86     { "tiff:PhotometricInterpretation", "tiff:PhotometricInterpretation", TypeDesc::INT, TiffRedundant },
87     { "tiff:subfiletype", "tiff:subfiletype", TypeDesc::INT, TiffRedundant },
88     { "tiff:Orientation", "Orientation", TypeDesc::INT, TiffRedundant },
89     { "tiff:XResolution", "XResolution", TypeDesc::FLOAT, Rational|TiffRedundant },
90     { "tiff:YResolution", "YResolution", TypeDesc::FLOAT, Rational|TiffRedundant },
91     { "tiff:ResolutionUnit", "ResolutionUnit", TypeDesc::INT, TiffRedundant },
92     { "tiff:Artist", "Artist", TypeDesc::STRING, 0 },
93     { "tiff:Copyright", "Copyright", TypeDesc::STRING, 0 },
94     { "tiff:DateTime", "DateTime", TypeDesc::STRING, DateConversion },
95     { "tiff:ImageDescription", "ImageDescription", TypeDesc::STRING, 0 },
96     { "tiff:Make", "Make", TypeDesc::STRING, 0 },
97     { "tiff:Model", "Model", TypeDesc::STRING, 0 },
98     { "tiff:Software", "Software", TypeDesc::STRING, TiffRedundant },
99 
100     { "exif:ColorSpace", "Exif:ColorSpace", TypeDesc::INT, ExifRedundant },
101     { "exif:PixelXDimension", "", TypeDesc::INT, ExifRedundant|TiffRedundant},
102     { "exif:PixelYDimension", "", TypeDesc::INT, ExifRedundant|TiffRedundant },
103     { "exifEX:PhotographicSensitivity", "Exif:ISOSpeedRatings", TypeDesc::INT, ExifRedundant },
104 
105     { "xmp:CreateDate", "DateTime", TypeDesc::STRING, DateConversion|TiffRedundant },
106     { "xmp:CreatorTool", "Software", TypeDesc::STRING, TiffRedundant },
107     { "xmp:Label", "IPTC:Label", TypeDesc::STRING, 0 },
108     { "xmp:MetadataDate", "IPTC:MetadataDate", TypeDesc::STRING, DateConversion },
109     { "xmp:ModifyDate", "IPTC:ModifyDate", TypeDesc::STRING, DateConversion },
110     { "xmp:Rating", "IPTC:Rating", TypeDesc::INT, 0 },
111 
112     { "xmpMM:DocumentID", "IPTC:DocumentID", TypeDesc::STRING, 0 },
113     { "xmpMM:History", "ImageHistory", TypeDesc::STRING, IsSeq|Suppress },
114     { "xmpMM:InstanceID", "IPTC:InstanceID", TypeDesc::STRING, 0 },
115     { "xmpMM:OriginalDocumentID", "IPTC:OriginalDocumentID", TypeDesc::STRING, 0 },
116 
117     { "xmpRights:Marked", "IPTC:CopyrightStatus", TypeDesc::INT, IsBool },
118     { "xmpRights:WebStatement", "IPTC:CopyrightInfoURL", TypeDesc::STRING, 0 },
119     { "xmpRights:UsageTerms", "IPTC:RightsUsageTerms", TypeDesc::STRING, 0 },
120 
121     { "dc:format", "", TypeDesc::STRING, TiffRedundant|Suppress },
122     { "dc:Description", "ImageDescription", TypeDesc::STRING, TiffRedundant },
123     { "dc:Creator", "Artist", TypeDesc::STRING, TiffRedundant },
124     { "dc:Rights", "Copyright", TypeDesc::STRING, TiffRedundant },
125     { "dc:title", "IPTC:ObjectName", TypeDesc::STRING, 0 },
126     { "dc:subject", "Keywords", TypeDesc::STRING, IsList },
127     { "dc:keywords", "Keywords", TypeDesc::STRING, IsList },
128 
129     { "Iptc4xmpCore:IntellectualGenre", "IPTC:IntellectualGenre", TypeDesc::STRING, 0 },
130     { "Iptc4xmpCore:CountryCode", "IPTC:CountryCode", TypeDesc::STRING, 0 },
131     { "Iptc4xmpCore:CreatorContactInfo", "IPTC:CreatorContactInfo", TypeDesc::STRING, 0 },
132     { "Iptc4xmpCore:ContactInfoDetails", "IPTC:Contact", TypeDesc::STRING, 0 },
133     { "Iptc4xmpCore:CiAdrExtadr", "IPTC:ContactInfoAddress", TypeDesc::STRING, 0 },
134     { "Iptc4xmpCore:CiAdrCity", "IPTC:ContactInfoCity", TypeDesc::STRING, 0 },
135     { "Iptc4xmpCore:CiAdrRegion", "IPTC:ContactInfoState", TypeDesc::STRING, 0 },
136     { "Iptc4xmpCore:CiAdrPcode", "IPTC:ContactInfoPostalCode", TypeDesc::STRING, 0 },
137     { "Iptc4xmpCore:CiAdrCtry", "IPTC:ContactInfoCountry", TypeDesc::STRING, 0 },
138     { "Iptc4xmpCore:CiEmailWork", "IPTC:ContactInfoEmail", TypeDesc::STRING, 0 },
139     { "Iptc4xmpCore:CiTelWork", "IPTC:ContactInfoPhone", TypeDesc::STRING, 0 },
140     { "Iptc4xmpCore:CiUrlWork", "IPTC:ContactInfoURL", TypeDesc::STRING, 0 },
141     { "Iptc4xmpCore:Location", "IPTC:Sublocation", TypeDesc::STRING, 0 },
142     { "Iptc4xmpCore:SubjectCode", "IPTC:SubjectCode", TypeDesc::STRING, IsList },
143     { "Iptc4xmpCore:Scene", "IPTC:SceneCode", TypeDesc::STRING, IsList },
144     { "Iptc4xmpExt:PersonInImage", "IPTC:PersonInImage", TypeDesc::STRING, IsList },
145 
146     { "aux:Firmware", "aux:Firmware", TypeDesc::STRING, 0},
147 
148     { "crs:AutoBrightness", "crs:AutoBrightness"  , TypeDesc::INT, IsBool },
149     { "crs:AutoContrast", "crs:AutoContrast"    , TypeDesc::INT, IsBool },
150     { "crs:AutoExposure", "crs:AutoExposure"    , TypeDesc::INT, IsBool },
151     { "crs:AutoShadows", "crs:AutoShadows"     , TypeDesc::INT, IsBool },
152     { "crs:BlueHue", "crs:BlueHue"         , TypeDesc::INT, 0 },
153     { "crs:BlueSaturation", "crs:BlueSaturation"  , TypeDesc::INT, 0 },
154     { "crs:Brightness", "crs:Brightness"      , TypeDesc::INT, 0 },
155     { "crs:CameraProfile", "crs:CameraProfile"   , TypeDesc::STRING, 0 },
156     { "crs:ChromaticAberrationB", "crs:ChromaticAberrationB"    , TypeDesc::INT, 0 },
157     { "crs:ChromaticAberrationR", "crs:ChromaticAberrationR"    , TypeDesc::INT, 0 },
158     { "crs:ColorNoiseReduction", "crs:ColorNoiseReduction" , TypeDesc::INT, 0 },
159     { "crs:Contrast", "crs:Contrast", TypeDesc::INT, 0 },
160     { "crs:CropTop", "crs:CropTop", TypeDesc::FLOAT, 0 },
161     { "crs:CropLeft", "crs:CropLeft", TypeDesc::FLOAT, 0 },
162     { "crs:CropBottom", "crs:CropBottom", TypeDesc::FLOAT, 0 },
163     { "crs:CropRight", "crs:CropRight", TypeDesc::FLOAT, 0 },
164     { "crs:CropAngle", "crs:CropAngle", TypeDesc::FLOAT, 0 },
165     { "crs:CropWidth", "crs:CropWidth", TypeDesc::FLOAT, 0 },
166     { "crs:CropHeight", "crs:CropHeight", TypeDesc::FLOAT, 0 },
167     { "crs:CropUnits", "crs:CropUnits", TypeDesc::INT, 0 },
168     { "crs:Exposure", "crs:Exposure", TypeDesc::FLOAT, 0 },
169     { "crs:GreenHue", "crs:GreenHue", TypeDesc::INT, 0 },
170     { "crs:GreenSaturation", "crs:GreenSaturation", TypeDesc::INT, 0 },
171     { "crs:HasCrop", "crs:HasCrop", TypeDesc::INT, IsBool },
172     { "crs:HasSettings", "crs:HasSettings", TypeDesc::INT, IsBool },
173     { "crs:LuminanceSmoothing", "crs:LuminanceSmoothing", TypeDesc::INT, 0 },
174     { "crs:RawFileName", "crs:RawFileName", TypeDesc::STRING, 0 },
175     { "crs:RedHue", "crs:RedHue", TypeDesc::INT, 0 },
176     { "crs:RedSaturation", "crs:RedSaturation", TypeDesc::INT, 0 },
177     { "crs:Saturation", "crs:Saturation", TypeDesc::INT, 0 },
178     { "crs:Shadows", "crs:Shadows", TypeDesc::INT, 0 },
179     { "crs:ShadowTint", "crs:ShadowTint", TypeDesc::INT, 0 },
180     { "crs:Sharpness", "crs:Sharpness", TypeDesc::INT, 0 },
181     { "crs:Temperature", "crs:Temperature", TypeDesc::INT, 0 },
182     { "crs:Tint", "crs:Tint", TypeDesc::INT, 0 },
183     { "crs:ToneCurve", "crs:ToneCurve", TypeDesc::STRING, 0 },
184     { "crs:ToneCurveName", "crs:ToneCurveName", TypeDesc::STRING, 0 },
185     { "crs:Version", "crs:Version", TypeDesc::STRING, 0 },
186     { "crs:VignetteAmount", "crs:VignetteAmount", TypeDesc::INT, 0 },
187     { "crs:VignetteMidpoint", "crs:VignetteMidpoint", TypeDesc::INT, 0 },
188     { "crs:WhiteBalance", "crs:WhiteBalance", TypeDesc::STRING, 0 },
189 
190     { "GPano:UsePanoramaViewer", "GPano:UsePanoramaViewer", TypeDesc::INT, IsBool },
191     { "GPano:CaptureSoftware", "GPano:CaptureSoftware", TypeDesc::STRING, 0 },
192     { "GPano:StitchingSoftware", "GPano:StitchingSoftware", TypeDesc::STRING, 0 },
193     { "GPano:ProjectionType", "GPano:ProjectionType", TypeDesc::STRING, 0 },
194     { "GPano:PoseHeadingDegrees", "GPano:PoseHeadingDegrees", TypeDesc::FLOAT, 0 },
195     { "GPano:PosePitchDegrees", "GPano:PosePitchDegrees", TypeDesc::FLOAT, 0 },
196     { "GPano:PoseRollDegrees", "GPano:PoseRollDegrees", TypeDesc::FLOAT, 0 },
197     { "GPano:InitialViewHeadingDegrees", "GPano:InitialViewHeadingDegrees", TypeDesc::INT, 0 },
198     { "GPano:InitialViewPitchDegrees", "GPano:InitialViewPitchDegrees", TypeDesc::INT, 0 },
199     { "GPano:InitialViewRollDegrees", "GPano:InitialViewRollDegrees", TypeDesc::INT, 0 },
200     { "GPano:InitialHorizontalFOVDegrees", "GPano:InitialHorizontalFOVDegrees", TypeDesc::FLOAT, 0 },
201     { "GPano:FirstPhotoDate", "GPano:FirstPhotoDate", TypeDesc::STRING, DateConversion },
202     { "GPano:LastPhotoDate", "GPano:LastPhotoDate", TypeDesc::STRING, DateConversion },
203     { "GPano:SourcePhotosCount", "GPano:SourcePhotosCount", TypeDesc::INT, 0 },
204     { "GPano:ExposureLockUsed", "GPano:ExposureLockUsed", TypeDesc::INT, IsBool },
205     { "GPano:CroppedAreaImageWidthPixels", "GPano:CroppedAreaImageWidthPixels", TypeDesc::INT, 0 },
206     { "GPano:CroppedAreaImageHeightPixels", "GPano:CroppedAreaImageHeightPixels", TypeDesc::INT, 0 },
207     { "GPano:FullPanoWidthPixels", "GPano:FullPanoWidthPixels", TypeDesc::INT, 0 },
208     { "GPano:FullPanoHeightPixels", "GPano:FullPanoHeightPixels", TypeDesc::INT, 0 },
209     { "GPano:CroppedAreaLeftPixels", "GPano:CroppedAreaLeftPixels", TypeDesc::INT, 0 },
210     { "GPano:CroppedAreaTopPixels", "GPano:CroppedAreaTopPixels", TypeDesc::INT, 0 },
211     { "GPano:InitialCameraDolly", "GPano:InitialCameraDolly", TypeDesc::FLOAT, 0 },
212     { "GPano:LargestValidInteriorRectWidth", "GPano:LargestValidInteriorRectWidth", TypeDesc::INT, 0 },
213     { "GPano:LargestValidInteriorRectHeight", "GPano:LargestValidInteriorRectHeight", TypeDesc::INT, 0 },
214     { "GPano:LargestValidInteriorRectTop", "GPano:LargestValidInteriorRectTop", TypeDesc::INT, 0 },
215     { "GPano:LargestValidInteriorRectLeft", "GPano:LargestValidInteriorRectLeft", TypeDesc::INT, 0 },
216 
217     { "rdf:li", "" },  // ignore these strays
218     { nullptr, nullptr }
219     // clang-format on
220 };
221 
222 
223 
224 class XMPtagMap {
225     typedef boost::container::flat_map<std::string, const XMPtag*> tagmap_t;
226     // Key is lower case so it's effectively case-insensitive
227 public:
XMPtagMap(const XMPtag * tag_table)228     XMPtagMap(const XMPtag* tag_table)
229     {
230         for (const XMPtag* t = &tag_table[0]; t->xmpname; ++t) {
231             std::string lower(t->xmpname);
232             Strutil::to_lower(lower);
233             m_tagmap[lower] = t;
234         }
235     }
236 
find(string_view name) const237     const XMPtag* find(string_view name) const
238     {
239         std::string lower = name;
240         Strutil::to_lower(lower);
241         tagmap_t::const_iterator i = m_tagmap.find(lower);
242         return i == m_tagmap.end() ? nullptr : i->second;
243     }
244 
245 private:
246     tagmap_t m_tagmap;
247 };
248 
249 static XMPtagMap&
xmp_tagmap_ref()250 xmp_tagmap_ref()
251 {
252     static XMPtagMap T(xmptag);
253     return T;
254 }
255 
256 
257 
258 // Does it look like the string representation of a rational value?
259 inline bool
string_is_rational(string_view s)260 string_is_rational(string_view s)
261 {
262     int n;
263     return Strutil::parse_int(s, n) && Strutil::parse_char(s, '/')
264            && Strutil::string_is_int(s);
265 }
266 
267 
268 
269 inline bool
parse_rational(string_view s,int & n,int & d)270 parse_rational(string_view s, int& n, int& d)
271 {
272     return Strutil::parse_int(s, n) && Strutil::parse_char(s, '/')
273            && Strutil::parse_int(s, d);
274 }
275 
276 
277 
278 // Utility: add an attribute to the spec with the given xml name and
279 // value.  Search for it in xmptag, and if found that will tell us what
280 // the type is supposed to be, as well as any special handling.  If not
281 // found in the table, add it as a string and hope for the best.
282 // Return value is the size of the resulting attribute (can be used to
283 // catch runaway or corrupt XML).
284 static size_t
add_attrib(ImageSpec & spec,string_view xmlname,string_view xmlvalue,bool attribIsSeq=false)285 add_attrib(ImageSpec& spec, string_view xmlname, string_view xmlvalue,
286            bool attribIsSeq = false)
287 {
288 #if DEBUG_XMP_READ
289     std::cerr << "add_attrib " << xmlname << ": '" << xmlvalue << "'\n";
290 #endif
291     std::string oiioname = xmlname;
292     TypeDesc oiiotype;
293     int special = NothingSpecial;
294 
295     // See if it's in the xmp table, which will tell us something about the
296     // proper type (everything in the xml itself just looks like a string).
297     if (const XMPtag* xt = xmp_tagmap_ref().find(xmlname)) {
298         if (!xt->oiioname || !xt->oiioname[0])
299             return 0;  // ignore it purposefully
300         // Found
301         oiioname = xt->oiioname;
302         oiiotype = xt->oiiotype;
303         special  = xt->special;
304     }
305 
306     // Also try looking it up to see if it's a known exif tag.
307     int tag = -1, tifftype = -1, count = 0;
308     if (Strutil::istarts_with(xmlname, "Exif:")
309         && (exif_tag_lookup(xmlname, tag, tifftype, count)
310             || exif_tag_lookup(xmlname.substr(5), tag, tifftype, count))) {
311         // It's a known Exif name
312         if (tifftype == TIFF_SHORT && count == 1)
313             oiiotype = TypeDesc::UINT;
314         else if (tifftype == TIFF_LONG && count == 1)
315             oiiotype = TypeDesc::UINT;
316         else if ((tifftype == TIFF_RATIONAL || tifftype == TIFF_SRATIONAL)
317                  && count == 1) {
318             oiiotype = TypeDesc::FLOAT;
319             special  = Rational;
320         } else if (tifftype == TIFF_ASCII)
321             oiiotype = TypeDesc::STRING;
322         else if (tifftype == TIFF_BYTE && count == 1)
323             oiiotype = TypeDesc::INT;
324         else if (tifftype == TIFF_NOTYPE)
325             return 0;  // skip
326     }
327 
328     // Guess the type if unknown
329     if (oiiotype == TypeUnknown) {
330         if (Strutil::string_is_int(xmlvalue))
331             oiiotype = TypeInt;
332         else if (Strutil::string_is_float(xmlvalue))
333             oiiotype = TypeFloat;
334         else
335             oiiotype = TypeString;
336         if (attribIsSeq)
337             special |= IsSeq;
338     }
339 
340     if (oiiotype == TypeDesc::STRING) {
341         std::string val;
342         if (special & (IsList | IsSeq)) {
343             // Special case -- append it to a list
344             std::vector<string_view> items;
345             ParamValue* p = spec.find_attribute(oiioname, TypeDesc::STRING);
346             bool dup      = false;
347             if (p) {
348                 items = Strutil::splitsv(*(const char**)p->data(), ";");
349                 for (auto& item : items) {
350                     item = Strutil::strip(item);
351                     dup |= (item == xmlvalue);
352                 }
353                 dup |= (xmlvalue == (*(const char**)p->data()));
354             }
355             if (!dup)
356                 items.emplace_back(xmlvalue);
357             val = Strutil::join(items, "; ");
358         } else {
359             val = xmlvalue;
360         }
361         spec.attribute(oiioname, val);
362         return val.size();
363     } else if (oiiotype == TypeRational || string_is_rational(xmlname)) {
364         int val[2];
365         if (parse_rational(xmlvalue, val[0], val[1]))
366             spec.attribute(xmlname, TypeRational, &val[0]);
367         return sizeof(val);
368     } else if (oiiotype == TypeDesc::INT) {
369         std::vector<int> vals;
370         if ((special & (IsList | IsSeq))
371             && spec.extra_attribs.contains(xmlname))
372             vals = spec.extra_attribs[xmlname].as_vec<int>();
373         if (special & IsBool)
374             vals.push_back((int)Strutil::iequals(xmlvalue, "true"));
375         else  // ordinary int
376             vals.push_back(Strutil::stoi(xmlvalue));
377         TypeDesc t = oiiotype;
378         if (vals.size() > 1)
379             t.arraylen = vals.size();
380         spec.attribute(oiioname, t, vals.data());
381         return vals.size() * sizeof(int);
382     } else if (oiiotype == TypeDesc::UINT) {
383         spec.attribute(oiioname, Strutil::from_string<unsigned int>(xmlvalue));
384         return sizeof(unsigned int);
385     } else if (oiiotype == TypeDesc::FLOAT) {
386         std::vector<float> vals;
387         if ((special & (IsList | IsSeq))
388             && spec.extra_attribs.contains(xmlname))
389             vals = spec.extra_attribs[xmlname].as_vec<float>();
390         vals.push_back(Strutil::stof(xmlvalue));
391         TypeDesc t = oiiotype;
392         if (vals.size() > 1)
393             t.arraylen = vals.size();
394         spec.attribute(oiioname, t, vals.data());
395         return vals.size() * sizeof(float);
396     }
397 #if (!defined(NDEBUG) || DEBUG_XMP_READ)
398     else {
399         std::cerr << "iptc xml add_attrib unknown type " << xmlname << ' '
400                   << oiiotype.c_str() << "\n";
401     }
402 #endif
403 
404     // Catch-all for unrecognized things -- just add them as a string!
405     spec.attribute(xmlname, xmlvalue);
406     return xmlvalue.size();
407 }
408 
409 
410 
411 // Utility: Search str for the first substring in str (starting from
412 // position pos) that starts with startmarker and ends with endmarker.
413 // If not found, return false.  If found, return true, store the
414 // beginning and ending indices in startpos and endpos.
415 static bool
extract_middle(string_view str,size_t pos,string_view startmarker,string_view endmarker,size_t & startpos,size_t & endpos)416 extract_middle(string_view str, size_t pos, string_view startmarker,
417                string_view endmarker, size_t& startpos, size_t& endpos)
418 {
419     startpos = str.find(startmarker, pos);
420     if (startpos == std::string::npos)
421         return false;  // start marker not found
422     endpos = str.find(endmarker, startpos);
423     if (endpos == std::string::npos)
424         return false;  // end marker not found
425     endpos += endmarker.size();
426     return true;
427 }
428 
429 
430 // Decode one XMP node and its children.
431 // Return value is the size of the resulting attribute (can be used to
432 // catch runaway or corrupt XML).
433 static size_t
decode_xmp_node(pugi::xml_node node,ImageSpec & spec,int level=1,const char * parentname=NULL,bool isList=false)434 decode_xmp_node(pugi::xml_node node, ImageSpec& spec, int level = 1,
435                 const char* parentname = NULL, bool isList = false)
436 {
437     std::string mylist;  // will accumulate for list items
438     size_t totalsize = 0;
439     for (; node; node = node.next_sibling()) {
440 #if DEBUG_XMP_READ
441         std::cerr << "Level " << level << " " << node.name() << " = "
442                   << node.value() << "\n";
443 #endif
444         // First, encode all attributes of this node
445         for (pugi::xml_attribute attr = node.first_attribute(); attr;
446              attr                     = attr.next_attribute()) {
447 #if DEBUG_XMP_READ
448             std::cerr << "   level " << level << " parent "
449                       << (parentname ? parentname : "-") << " attr "
450                       << attr.name() << ' ' << attr.value() << "\n";
451 #endif
452             if (Strutil::istarts_with(attr.name(), "xml:")
453                 || Strutil::istarts_with(attr.name(), "xmlns:"))
454                 continue;  // xml attributes aren't image metadata
455             if (attr.name()[0] && attr.value()[0]) {
456                 auto sz = add_attrib(spec, attr.name(), attr.value(), isList);
457                 totalsize += sz;
458                 // As a guard against runaway lists or corrupt XMP blocks,
459                 // don't let attribute lists grow to more than 64KB each.
460                 if (sz > 64 * 1024)
461                     break;
462             }
463         }
464         if (Strutil::iequals(node.name(), "xmpMM::History")) {
465             // FIXME -- image history is complicated. Come back to it.
466             continue;
467         }
468         if (Strutil::iequals(node.name(), "photoshop:DocumentAncestors")) {
469             // This tag is nothing but trouble. Some images can have
470             // literally MBs in them, placed there by Photoshop as the
471             // result of certain cut-and-paste operations, but serving no
472             // discernable purpose. Just skip it.  See also:
473             // https://prepression.blogspot.com/2017/06/metadata-bloat-photoshopdocumentancestors.html
474             // https://feedback.photoshop.com/conversations/photoshop/photoshop-corrupt-ancestors-tag-in-xmp-causing-giant-file-sizes/5f5f45f74b561a3d426ba97f
475             continue;
476         }
477         if (Strutil::iequals(node.name(), "rdf:Bag")
478             || Strutil::iequals(node.name(), "rdf:Seq")
479             || Strutil::iequals(node.name(), "rdf:Alt")
480             || Strutil::iequals(node.name(), "rdf:li")) {
481             // Various kinds of lists.  Recurse, pass the parent name
482             // down, and let the child know it's part of a list.
483             totalsize += decode_xmp_node(node.first_child(), spec, level + 1,
484                                          parentname, true);
485         } else {
486             // Not a list, but it's got children.  Recurse.
487             totalsize += decode_xmp_node(node.first_child(), spec, level + 1,
488                                          node.name(), isList);
489         }
490 
491         // If this node has a value but no name, it's definitely part
492         // of a list.  Accumulate the list items, separated by semicolons.
493         if (parentname && !node.name()[0] && node.value()[0]) {
494             totalsize -= mylist.size();
495             if (mylist.size())
496                 mylist += ";";
497             mylist += node.value();
498             totalsize += mylist.size();
499         }
500         // As a guard against runaway lists or corrupt XMP blocks,
501         // don't let attribute lists grow to more than 64KB each.
502         if (isList && totalsize > 64 * 1024)
503             break;
504     }
505 
506     // If we have accumulated a list, turn it into an attribute
507     if (parentname && mylist.size()) {
508         totalsize += add_attrib(spec, parentname, mylist, true);
509     }
510     return totalsize;
511 }
512 
513 
514 }  // anonymous namespace
515 
516 
517 
518 // DEPRECATED(2.1)
519 bool
decode_xmp(const std::string & xml,ImageSpec & spec)520 decode_xmp(const std::string& xml, ImageSpec& spec)
521 {
522     return decode_xmp(string_view(xml), spec);
523 }
524 
525 
526 
527 // DEPRECATED(2.1)
528 bool
decode_xmp(const char * xml,ImageSpec & spec)529 decode_xmp(const char* xml, ImageSpec& spec)
530 {
531     return decode_xmp(string_view(xml), spec);
532 }
533 
534 
535 
536 bool
decode_xmp(cspan<uint8_t> xml,ImageSpec & spec)537 decode_xmp(cspan<uint8_t> xml, ImageSpec& spec)
538 {
539     return decode_xmp(string_view((const char*)xml.data(), xml.size()), spec);
540 }
541 
542 
543 
544 bool
decode_xmp(string_view xml,ImageSpec & spec)545 decode_xmp(string_view xml, ImageSpec& spec)
546 {
547 #if DEBUG_XMP_READ
548     Timer timer;
549     std::cerr << "XMP size is " << xml.size() << "\n";
550     std::cerr << "XMP dump:\n---\n" << xml.substr(0, 4096) << "\n---\n";
551 #endif
552     if (!xml.length())
553         return true;
554     for (size_t startpos = 0, endpos = 0;
555          extract_middle(xml, endpos, "<rdf:Description", "</rdf:Description>",
556                         startpos, endpos);) {
557         // Turn that middle section into an XML document
558         string_view rdf = xml.substr(startpos, endpos - startpos);  // scooch in
559 #if DEBUG_XMP_READ
560         std::cerr << "RDF is:\n---\n" << rdf.substr(0, 4096) << "\n---\n";
561 #endif
562         pugi::xml_document doc;
563         pugi::xml_parse_result parse_result
564             = doc.load_buffer(rdf.data(), rdf.size(),
565                               pugi::parse_default | pugi::parse_fragment);
566         if (!parse_result) {
567 #if DEBUG_XMP_READ
568             std::cerr << "Error parsing XML @" << parse_result.offset << ": "
569                       << parse_result.description() << "\n";
570 #endif
571             // Instead of returning early here if there were errors parsing
572             // the XML -- I have noticed that very minor XML malformations
573             // are common in XMP found in files -- hope for the best and
574             // go ahead and assume that maybe it managed to put something
575             // useful in the resulting document.
576 #if 0
577             return true;
578 #endif
579         }
580         // Decode the contents of the XML document (it will recurse)
581         decode_xmp_node(doc.first_child(), spec);
582     }
583 #if DEBUG_XMP_READ
584     std::cerr << "XMP total parse time " << timer() << "\n";
585 #endif
586 
587     return true;
588 }
589 
590 
591 
592 // Turn one ParamValue (whose xmp info we know) into a properly
593 // serialized xmp string.
594 static std::string
stringize(const ParamValueList::const_iterator & p,const XMPtag & xmptag)595 stringize(const ParamValueList::const_iterator& p, const XMPtag& xmptag)
596 {
597     if (p->type() == TypeDesc::STRING) {
598         if (xmptag.special & DateConversion) {
599             // FIXME -- convert to yyyy-mm-ddThh:mm:ss.sTZD
600             // return std::string();
601         }
602         return std::string(*(const char**)p->data());
603     } else if (p->type() == TypeDesc::INT) {
604         if (xmptag.special & IsBool)
605             return *(const int*)p->data() ? "True" : "False";
606         else  // ordinary int
607             return Strutil::sprintf("%d", *(const int*)p->data());
608     } else if (p->type() == TypeDesc::FLOAT) {
609         if (xmptag.special & Rational) {
610             unsigned int num, den;
611             float_to_rational(*(const float*)p->data(), num, den);
612             return Strutil::sprintf("%d/%d", num, den);
613         } else {
614             return Strutil::sprintf("%g", *(const float*)p->data());
615         }
616     }
617     return std::string();
618 }
619 
620 
621 
622 static void
gather_xmp_attribs(const ImageSpec & spec,std::vector<std::pair<const XMPtag *,std::string>> & list)623 gather_xmp_attribs(const ImageSpec& spec,
624                    std::vector<std::pair<const XMPtag*, std::string>>& list)
625 {
626     // Loop over all params...
627     for (ParamValueList::const_iterator p = spec.extra_attribs.begin();
628          p != spec.extra_attribs.end(); ++p) {
629         // For this param, see if there's a table entry with a matching
630         // name, where the xmp name is in the right category.
631         const XMPtag* tag = xmp_tagmap_ref().find(p->name());
632         if (tag) {
633             if (!Strutil::iequals(p->name(), tag->oiioname))
634                 continue;  // Name doesn't match
635             if (tag->special & Suppress) {
636                 break;  // Purposely suppressing
637             }
638             std::string s = stringize(p, *tag);
639             if (s.size()) {
640                 list.emplace_back(tag, s);
641                 //std::cerr << "  " << tag->xmpname << " = " << s << "\n";
642             }
643         }
644     }
645 }
646 
647 
648 
649 enum XmpControl {
650     XMP_suppress,
651     XMP_nodes,
652     XMP_attribs,
653     XMP_SeqList,  // sequential list
654     XMP_BagList,  // unordered list
655     XMP_AltList   // alternate list, WTF is that?
656 };
657 
658 
659 // Turn an entire category of XMP items into a properly serialized
660 // xml fragment.
661 static std::string
encode_xmp_category(std::vector<std::pair<const XMPtag *,std::string>> & list,const char * xmlnamespace,const char * pattern,const char * exclude_pattern,const char * nodename,const char * url,bool minimal,XmpControl control)662 encode_xmp_category(std::vector<std::pair<const XMPtag*, std::string>>& list,
663                     const char* xmlnamespace, const char* pattern,
664                     const char* exclude_pattern, const char* nodename,
665                     const char* url, bool minimal, XmpControl control)
666 {
667     std::string category = std::string(xmlnamespace) + ':';
668     std::string xmp;
669     std::string xmp_minimal;
670 
671 #if DEBUG_XMP_WRITE
672     std::cerr << "Category " << xmlnamespace << ", pattern '" << pattern
673               << "'\n";
674 #endif
675     // Loop over all params...
676     bool found = false;
677     for (size_t li = 0; li < list.size(); ++li) {
678         // For this param, see if there's a table entry with a matching
679         // name, where the xmp name is in the right category.
680         const XMPtag* tag = list[li].first;
681         const std::string& val(list[li].second);
682         const char* xmpname(tag->xmpname);
683         if (control == XMP_attribs && (tag->special & (IsList | IsSeq)))
684             continue;  // Skip lists for attrib output
685         if (exclude_pattern && exclude_pattern[0]
686             && Strutil::istarts_with(xmpname, exclude_pattern)) {
687             continue;
688         }
689         if (Strutil::istarts_with(xmpname, pattern)) {
690             std::string x;
691             if (control == XMP_attribs)
692                 x = Strutil::sprintf("%s=\"%s\"", xmpname, val);
693             else if (control == XMP_AltList || control == XMP_BagList) {
694                 std::vector<std::string> vals;
695                 Strutil::split(val, vals, ";");
696                 for (auto& val : vals) {
697                     val = Strutil::strip(val);
698                     x += Strutil::sprintf("<rdf:li>%s</rdf:li>", val);
699                 }
700             } else
701                 x = Strutil::sprintf("<%s>%s</%s>", xmpname, val, xmpname);
702             if (!x.empty() && control != XMP_suppress) {
703                 if (!found) {
704                     // if (nodename && nodename[0]) {
705                     //    x = Strutil::sprintf("<%s ", nodename);
706                     // }
707                 }
708                 if (minimal
709                     && (tag->special & (TiffRedundant | ExifRedundant))) {
710                     if (xmp_minimal.size())
711                         xmp_minimal += ' ';
712                     xmp_minimal += x;
713                 } else {
714                     if (xmp.size())
715                         xmp += ' ';
716                     xmp += x;
717                 }
718                 found = true;
719 #if DEBUG_XMP_WRITE
720                 std::cerr << "  going to output '" << x << "'\n";
721 #endif
722             }
723 #if DEBUG_XMP_WRITE
724             else
725                 std::cerr << "  NOT going to output '" << x << "'\n";
726 #endif
727             list.erase(list.begin() + li);
728             --li;
729         }
730     }
731 
732     if (xmp.length() && xmp_minimal.length())
733         xmp += ' ' + xmp_minimal;
734 
735 #if 1
736     if (xmp.length()) {
737         if (control == XMP_BagList)
738             xmp = Strutil::sprintf("<%s><rdf:Bag> %s </rdf:Bag></%s>",
739                                    nodename ? nodename : xmlnamespace, xmp,
740                                    nodename ? nodename : xmlnamespace);
741         else if (control == XMP_SeqList)
742             xmp = Strutil::sprintf("<%s><rdf:Seq> %s </rdf:Seq></%s>",
743                                    nodename ? nodename : xmlnamespace, xmp,
744                                    nodename ? nodename : xmlnamespace);
745         else if (control == XMP_AltList)
746             xmp = Strutil::sprintf("<%s><rdf:Alt> %s </rdf:Alt></%s>",
747                                    nodename ? nodename : xmlnamespace, xmp,
748                                    nodename ? nodename : xmlnamespace);
749 #    if 0
750         else if (control == XMP_nodes)
751             xmp = Strutil::sprintf("<%s>%s</%s>",
752                                    nodename ? nodename : xmlnamespace, xmp,
753                                    nodename ? nodename : xmlnamespace);
754 #    endif
755 
756         std::string r;
757         r += Strutil::sprintf("<rdf:Description rdf:about=\"\" "
758                               "xmlns:%s=\"%s\"%s",
759                               xmlnamespace, url,
760                               (control == XMP_attribs) ? " " : ">");
761         r += xmp;
762         if (control == XMP_attribs)
763             r += "/> ";  // end the <rdf:Description...
764         else
765             r += " </rdf:Description>";
766         return r;
767     }
768 #endif
769 
770 #if DEBUG_XMP_WRITE
771     std::cerr << "  Nothing to output\n";
772 #endif
773     return std::string();
774 }
775 
776 
777 
778 std::string
encode_xmp(const ImageSpec & spec,bool minimal)779 encode_xmp(const ImageSpec& spec, bool minimal)
780 {
781     std::vector<std::pair<const XMPtag*, std::string>> list;
782     gather_xmp_attribs(spec, list);
783 
784     std::string xmp;
785 
786 #if 1
787     // This stuff seems to work
788     xmp += encode_xmp_category(list, "photoshop", "photoshop:", NULL, NULL,
789                                "http://ns.adobe.com/photoshop/1.0/", minimal,
790                                XMP_attribs);
791     xmp += encode_xmp_category(list, "xmp", "xmp:Rating", NULL, NULL,
792                                "http://ns.adobe.com/xap/1.0/", minimal,
793                                XMP_attribs);
794     xmp += encode_xmp_category(list, "xmp", "xmp:CreateDate", NULL, NULL,
795                                "http://ns.adobe.com/xap/1.0/", false,
796                                XMP_attribs);
797     xmp += encode_xmp_category(list, "xmp", "xmp:ModifyDate", NULL, NULL,
798                                "http://ns.adobe.com/xap/1.0/", false,
799                                XMP_attribs);
800     xmp += encode_xmp_category(list, "xmp", "xmp:MetadataDate", NULL, NULL,
801                                "http://ns.adobe.com/xap/1.0/", false,
802                                XMP_attribs);
803     xmp += encode_xmp_category(list, "xmpRights", "xmpRights:UsageTerms", NULL,
804                                "xmpRights:UsageTerms",
805                                "http://ns.adobe.com/xap/1.0/rights/", minimal,
806                                XMP_AltList);
807     xmp += encode_xmp_category(list, "xmpRights", "xmpRights:", NULL, NULL,
808                                "http://ns.adobe.com/xap/1.0/rights/", minimal,
809                                XMP_attribs);
810     xmp += encode_xmp_category(list, "dc", "dc:subject", NULL, "dc:subject",
811                                "http://purl.org/dc/elements/1.1/", minimal,
812                                XMP_BagList);
813     xmp += encode_xmp_category(list, "Iptc4xmpCore", "Iptc4xmpCore:SubjectCode",
814                                NULL, "Iptc4xmpCore:SubjectCode",
815                                "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/",
816                                false, XMP_BagList);
817     xmp += encode_xmp_category(list, "Iptc4xmpCore",
818                                "Iptc4xmpCore:", "Iptc4xmpCore:Ci", NULL,
819                                "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/",
820                                minimal, XMP_attribs);
821     xmp += encode_xmp_category(list, "Iptc4xmpCore", "Iptc4xmpCore:Ci", NULL,
822                                "Iptc4xmpCore:CreatorContactInfo",
823                                "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/",
824                                minimal, XMP_attribs);
825     xmp += encode_xmp_category(list, "Iptc4xmpCore", "Iptc4xmpCore:Scene", NULL,
826                                "Iptc4xmpCore:Scene",
827                                "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/",
828                                minimal, XMP_BagList);
829 
830     xmp += encode_xmp_category(list, "xmpMM", "xmpMM:", NULL, NULL,
831                                "http://ns.adobe.com/xap/1.0/mm/", minimal,
832                                XMP_attribs);
833     xmp += encode_xmp_category(list, "GPano", "GPano:", NULL, NULL,
834                                "http://ns.google.com/photos/1.0/panorama/",
835                                minimal, XMP_attribs);
836     xmp += encode_xmp_category(list, "crs", "crs:", NULL, NULL,
837                                "http://ns.adobe.com/camera-raw-settings/1.0/",
838                                minimal, XMP_attribs);
839 #endif
840 
841     xmp += encode_xmp_category(list, "xmp", "xmp:", NULL, NULL,
842                                "http://ns.adobe.com/xap/1.0/", minimal,
843                                XMP_nodes);
844 
845     xmp += encode_xmp_category(list, "tiff", "tiff:", NULL, NULL,
846                                "http://ns.adobe.com/tiff/1.0/", minimal,
847                                XMP_attribs);
848 #if 0
849     // Doesn't work yet
850     xmp += encode_xmp_category (list, "xapRights", "xapRights:", NULL, NULL,
851                                 "http://ns.adobe.com/xap/1.0/rights/", minimal, XMP_attribs);
852 //    xmp += encode_xmp_category (list, "dc", "dc:", NULL, NULL,
853 //                                "http://purl.org/dc/elements/1.1/", minimal, XMP_attribs);
854 
855 #endif
856 
857     // FIXME exif xmp stRef stVer stJob xmpDM
858 
859     if (!xmp.empty()) {
860         std::string head(
861             "<?xpacket begin=\"\xEF\xBB\xBF\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?> "
862             "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\" x:xmptk=\"Adobe XMP Core 5.5-c002 1.148022, 2012/07/15-18:06:45        \"> <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"> ");
863         std::string foot(" </rdf:RDF> </x:xmpmeta> <?xpacket end=\"w\"?>");
864         xmp = head + xmp + foot;
865     }
866 
867 
868 #if DEBUG_XMP_WRITE
869     std::cerr << "xmp to write = \n---\n" << xmp << "\n---\n";
870     std::cerr << "\n\nHere's what I still haven't output:\n";
871     for (size_t i = 0; i < list.size(); ++i)
872         std::cerr << list[i].first->xmpname << "\n";
873 #endif
874 
875     return xmp;
876 }
877 
878 
879 OIIO_NAMESPACE_END
880