1 /*=========================================================================
2 
3   Program:   Visualization Toolkit
4   Module:    vtkXMLUtilities.cxx
5 
6   Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
7   All rights reserved.
8   See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
9 
10      This software is distributed WITHOUT ANY WARRANTY; without even
11      the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12      PURPOSE.  See the above copyright notice for more information.
13 
14 =========================================================================*/
15 #include "vtkXMLUtilities.h"
16 
17 #include "vtkObjectFactory.h"
18 #include "vtkXMLDataElement.h"
19 #include "vtkXMLDataParser.h"
20 #include "vtksys/FStream.hxx"
21 
22 #include <sstream>
23 
24 #if !defined(_WIN32) || defined(__CYGWIN__)
25 #include <unistd.h> /* unlink */
26 #else
27 #include <io.h> /* unlink */
28 #endif
29 
30 #include <vector>
31 
32 typedef std::vector<vtkXMLDataElement*> vtkXMLUtilitiesDataElementContainer;
33 
34 vtkStandardNewMacro(vtkXMLUtilities);
35 
PrintSelf(ostream & os,vtkIndent indent)36 void vtkXMLUtilities::PrintSelf(ostream& os, vtkIndent indent)
37 {
38   this->Superclass::PrintSelf(os, indent);
39 }
40 
41 #define VTK_XML_UTILITIES_FACTORED_POOL_NAME "FactoredPool"
42 #define VTK_XML_UTILITIES_FACTORED_NAME "Factored"
43 #define VTK_XML_UTILITIES_FACTORED_REF_NAME "FactoredRef"
44 
45 //------------------------------------------------------------------------------
vtkXMLUtilitiesEncodeEntities(unsigned char c,ostream & output)46 inline int vtkXMLUtilitiesEncodeEntities(unsigned char c, ostream& output)
47 {
48   switch (c)
49   {
50     case '&':
51       output << "&amp;";
52       return 1;
53 
54     case '"':
55       output << "&quot;";
56       return 1;
57 
58     case '\'':
59       output << "&apos;";
60       return 1;
61 
62     case '<':
63       output << "&lt;";
64       return 1;
65 
66     case '>':
67       output << "&gt;";
68       return 1;
69   }
70 
71   return 0;
72 }
73 
74 //------------------------------------------------------------------------------
EncodeString(const char * input,int input_encoding,ostream & output,int output_encoding,int special_entities)75 void vtkXMLUtilities::EncodeString(
76   const char* input, int input_encoding, ostream& output, int output_encoding, int special_entities)
77 {
78   // No string
79 
80   if (!input)
81   {
82     return;
83   }
84 
85   // If either the input or output encoding is not specified,
86   // or they are the same, dump as is (if no entities had to be converted)
87 
88   int no_input_encoding =
89     (input_encoding <= VTK_ENCODING_NONE || input_encoding >= VTK_ENCODING_UNKNOWN);
90 
91   int no_output_encoding =
92     (output_encoding <= VTK_ENCODING_NONE || output_encoding >= VTK_ENCODING_UNKNOWN);
93 
94   if (!special_entities &&
95     (no_input_encoding || no_output_encoding || input_encoding == output_encoding))
96   {
97     output << input;
98     return;
99   }
100 
101   // Convert
102 
103   const unsigned char* str = (const unsigned char*)input;
104 
105   // If either the input or output encoding is not specified, just process
106   // the entities
107 
108   if (no_input_encoding || no_output_encoding)
109   {
110     while (*str)
111     {
112       if (!vtkXMLUtilitiesEncodeEntities(*str, output))
113       {
114         output << *str;
115       }
116       str++;
117     }
118     return;
119   }
120 
121   // To VTK_UTF_8...
122 
123   if (output_encoding == VTK_ENCODING_UTF_8)
124   {
125     int from_iso_8859 =
126       (input_encoding >= VTK_ENCODING_ISO_8859_1 && input_encoding <= VTK_ENCODING_ISO_8859_16);
127 
128     // From ISO-8859 or US-ASCII
129 
130     if (input_encoding == VTK_ENCODING_US_ASCII || from_iso_8859)
131     {
132       while (*str)
133       {
134         if (!special_entities || !vtkXMLUtilitiesEncodeEntities(*str, output))
135         {
136           if (*str > 0x7F)
137           {
138 #if 0
139             // This should be the right implementation, but it seems that
140             // it just does not work for Expat. Brad and I should dig into
141             // that later, but it seems weird. In the meantime, just
142             // output the hex representation.
143 
144             output << "&#x"
145                    << hex << (0xC0 | (*str >> 6))
146                    << hex << (0x80 | (*str & 0x3F))
147                    << ';';
148 #else
149             output << "&#x" << hex << (int)(*str) << ';';
150 #endif
151           }
152           else if (*str < 30)
153           {
154             output << "&#x" << hex << (int)(*str) << ';';
155           }
156           else
157           {
158             output << *str;
159           }
160         }
161         str++;
162       }
163     }
164 
165     // From VTK_ENCODING_UTF_8 (i.e. just encode the entities)
166     // To be completed (need the whole &#x)
167 
168     else if (input_encoding == VTK_ENCODING_UTF_8)
169     {
170       while (*str)
171       {
172         if (!vtkXMLUtilitiesEncodeEntities(*str, output))
173         {
174           output << *str;
175         }
176         str++;
177       }
178     }
179 
180     // Unsupported input encoding
181 
182     else
183     {
184       vtkGenericWarningMacro(<< "Input encoding not supported (" << input_encoding << ")");
185     }
186   }
187 
188   // From VTK_ENCODING_UTF_8...
189 
190   else if (input_encoding == VTK_ENCODING_UTF_8)
191   {
192     int to_iso_8859 =
193       (output_encoding >= VTK_ENCODING_ISO_8859_1 && output_encoding <= VTK_ENCODING_ISO_8859_16);
194 
195     // To US-ASCII or ISO 8859
196 
197     if (output_encoding == VTK_ENCODING_US_ASCII || to_iso_8859)
198     {
199       while (*str)
200       {
201         if (!special_entities || !vtkXMLUtilitiesEncodeEntities(*str, output))
202         {
203           // Multi-byte 2-chars converted into one char
204 
205           if (*str > 0x7F)
206           {
207             output << (unsigned char)((*str << 6) | (str[1] & 0x3F));
208             str++;
209           }
210           else
211           {
212             output << *str;
213           }
214         }
215         str++;
216       }
217     }
218 
219     // Unsupported output encoding
220 
221     else
222     {
223       vtkGenericWarningMacro(<< "Output encoding not supported (" << input_encoding << ")");
224     }
225   }
226 }
227 
228 //------------------------------------------------------------------------------
CollateAttributes(vtkXMLDataElement * elem,ostream & os,const char * sep)229 void vtkXMLUtilities::CollateAttributes(vtkXMLDataElement* elem, ostream& os, const char* sep)
230 {
231   if (!elem)
232   {
233     return;
234   }
235 
236   int i, nb = elem->GetNumberOfAttributes();
237   for (i = 0; i < nb; i++)
238   {
239     const char* name = elem->GetAttributeName(i);
240     if (name)
241     {
242       const char* value = elem->GetAttribute(name);
243       if (value)
244       {
245         if (i)
246         {
247           os << (sep ? sep : " ");
248         }
249         os << name << "=\"";
250         vtkXMLUtilities::EncodeString(
251           value, elem->GetAttributeEncoding(), os, VTK_ENCODING_UTF_8, 1);
252         os << '\"';
253       }
254     }
255   }
256 }
257 
258 //------------------------------------------------------------------------------
FlattenElement(vtkXMLDataElement * elem,ostream & os,vtkIndent * indent,int indent_attributes)259 void vtkXMLUtilities::FlattenElement(
260   vtkXMLDataElement* elem, ostream& os, vtkIndent* indent, int indent_attributes)
261 {
262   if (!elem)
263   {
264     return;
265   }
266 
267   unsigned long pos = os.tellp();
268 
269   // Name
270 
271   if (indent)
272   {
273     os << *indent;
274   }
275 
276   os << '<' << elem->GetName();
277 
278   // Attributes
279 
280   if (elem->GetNumberOfAttributes())
281   {
282     os << ' ';
283     if (indent && indent_attributes)
284     {
285       unsigned long len = (unsigned long)os.tellp() - pos;
286       if (os.fail())
287       {
288         return;
289       }
290       char* sep = new char[1 + len + 1];
291       sep[0] = '\n';
292       memset(sep + 1, ' ', len);
293       sep[len + 1] = '\0';
294       vtkXMLUtilities::CollateAttributes(elem, os, sep);
295       delete[] sep;
296     }
297     else
298     {
299       vtkXMLUtilities::CollateAttributes(elem, os);
300     }
301   }
302 
303   const char* cdata = elem->GetCharacterData();
304   int nb_nested = elem->GetNumberOfNestedElements();
305   int need_close_tag = (nb_nested || cdata);
306 
307   if (!need_close_tag)
308   {
309     os << "/>";
310   }
311   else
312   {
313     os << '>';
314   }
315 
316   // cdata
317 
318   if (cdata)
319   {
320     vtkXMLUtilities::EncodeString(cdata, elem->GetAttributeEncoding(), os, VTK_ENCODING_UTF_8, 1);
321   }
322 
323   // Nested elements
324 
325   if (nb_nested)
326   {
327     if (indent)
328     {
329       os << '\n';
330     }
331     for (int i = 0; i < nb_nested; i++)
332     {
333       if (indent)
334       {
335         vtkIndent next_indent = indent->GetNextIndent();
336         vtkXMLUtilities::FlattenElement(elem->GetNestedElement(i), os, &next_indent);
337       }
338       else
339       {
340         vtkXMLUtilities::FlattenElement(elem->GetNestedElement(i), os);
341       }
342     }
343     if (indent)
344     {
345       os << *indent;
346     }
347   }
348 
349   // Close
350 
351   if (need_close_tag)
352   {
353     os << "</" << elem->GetName() << '>';
354   }
355 
356   if (indent)
357   {
358     os << '\n';
359   }
360 }
361 
362 //------------------------------------------------------------------------------
WriteElementToFile(vtkXMLDataElement * elem,const char * filename,vtkIndent * indent)363 int vtkXMLUtilities::WriteElementToFile(
364   vtkXMLDataElement* elem, const char* filename, vtkIndent* indent)
365 {
366   if (!elem || !filename)
367   {
368     return 0;
369   }
370 
371   vtksys::ofstream os(filename, ios::out);
372   vtkXMLUtilities::FlattenElement(elem, os, indent);
373 
374   os.flush();
375   if (os.fail())
376   {
377     os.close();
378     unlink(filename);
379     return 0;
380   }
381   return 1;
382 }
383 
384 //------------------------------------------------------------------------------
ReadElementFromStream(istream & is,int encoding)385 vtkXMLDataElement* vtkXMLUtilities::ReadElementFromStream(istream& is, int encoding)
386 {
387   vtkXMLDataElement* res = nullptr;
388   vtkXMLDataParser* xml_parser = vtkXMLDataParser::New();
389   xml_parser->SetAttributesEncoding(encoding);
390 
391   xml_parser->SetStream(&is);
392   if (xml_parser->Parse())
393   {
394     res = xml_parser->GetRootElement();
395     // Bump up the ref count since we are going to delete the parser
396     // which actually owns the element
397     res->SetReferenceCount(res->GetReferenceCount() + 1);
398     vtkXMLUtilities::UnFactorElements(res);
399   }
400 
401   xml_parser->Delete();
402   return res;
403 }
404 
405 //------------------------------------------------------------------------------
ReadElementFromString(const char * str,int encoding)406 vtkXMLDataElement* vtkXMLUtilities::ReadElementFromString(const char* str, int encoding)
407 {
408   if (!str)
409   {
410     return nullptr;
411   }
412 
413   std::stringstream strstr;
414   strstr << str;
415   vtkXMLDataElement* res = vtkXMLUtilities::ReadElementFromStream(strstr, encoding);
416 
417   return res;
418 }
419 
420 //------------------------------------------------------------------------------
ReadElementFromFile(const char * filename,int encoding)421 vtkXMLDataElement* vtkXMLUtilities::ReadElementFromFile(const char* filename, int encoding)
422 {
423   if (!filename)
424   {
425     return nullptr;
426   }
427 
428   vtksys::ifstream is(filename);
429   return vtkXMLUtilities::ReadElementFromStream(is, encoding);
430 }
431 
432 //------------------------------------------------------------------------------
ReadElementFromAttributeArray(vtkXMLDataElement * element,const char ** atts,int encoding)433 void vtkXMLUtilities::ReadElementFromAttributeArray(
434   vtkXMLDataElement* element, const char** atts, int encoding)
435 {
436   if (atts)
437   {
438     // If the target encoding is VTK_ENCODING_NONE or VTK_ENCODING_UNKNOWN,
439     // then keep the internal/default encoding, otherwise encode each
440     // attribute using that new format
441 
442     if (encoding != VTK_ENCODING_NONE && encoding != VTK_ENCODING_UNKNOWN)
443     {
444       element->SetAttributeEncoding(encoding);
445     }
446 
447     // Process each attributes returned by Expat in UTF-8 encoding, and
448     // convert them to our encoding
449 
450     for (int i = 0; atts[i] && atts[i + 1]; i += 2)
451     {
452       if (element->GetAttributeEncoding() == VTK_ENCODING_UTF_8)
453       {
454         element->SetAttribute(atts[i], atts[i + 1]);
455       }
456       else
457       {
458         std::ostringstream str;
459         vtkXMLUtilities::EncodeString(
460           atts[i + 1], VTK_ENCODING_UTF_8, str, element->GetAttributeEncoding(), 0);
461         str << ends;
462         element->SetAttribute(atts[i], str.str().c_str());
463       }
464     }
465   }
466 }
467 
468 //------------------------------------------------------------------------------
vtkXMLUtilitiesFindSimilarElementsInternal(vtkXMLDataElement * elem,vtkXMLDataElement * tree,vtkXMLUtilitiesDataElementContainer * results)469 static void vtkXMLUtilitiesFindSimilarElementsInternal(
470   vtkXMLDataElement* elem, vtkXMLDataElement* tree, vtkXMLUtilitiesDataElementContainer* results)
471 {
472   if (!elem || !tree || !results || elem == tree)
473   {
474     return;
475   }
476 
477   // If the element is equal to the current tree, append it to the
478   // results, otherwise check the sub-trees
479 
480   if (elem->IsEqualTo(tree))
481   {
482     results->push_back(tree);
483   }
484   else
485   {
486     for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
487     {
488       vtkXMLUtilitiesFindSimilarElementsInternal(elem, tree->GetNestedElement(i), results);
489     }
490   }
491 }
492 
493 //------------------------------------------------------------------------------
FindSimilarElements(vtkXMLDataElement * elem,vtkXMLDataElement * tree,vtkXMLDataElement *** results)494 int vtkXMLUtilities::FindSimilarElements(
495   vtkXMLDataElement* elem, vtkXMLDataElement* tree, vtkXMLDataElement*** results)
496 {
497   *results = nullptr;
498 
499   if (!elem || !tree)
500   {
501     return 0;
502   }
503 
504   // Create a data element container, and find all similar elements
505 
506   vtkXMLUtilitiesDataElementContainer* container = new vtkXMLUtilitiesDataElementContainer;
507 
508   vtkXMLUtilitiesFindSimilarElementsInternal(elem, tree, container);
509 
510   // If nothing was found, exit now
511 
512   int size = (int)container->size();
513   if (size)
514   {
515     // Allocate an array of element and copy the contents of the container
516     // to this flat structure
517 
518     *results = new vtkXMLDataElement*[size];
519 
520     size = 0;
521     for (vtkXMLUtilitiesDataElementContainer::const_iterator it = container->begin();
522          it != container->end(); ++it)
523     {
524       if (*it)
525       {
526         (*results)[size++] = *it;
527       }
528     }
529   }
530 
531   delete container;
532 
533   return size;
534 }
535 
536 //------------------------------------------------------------------------------
FactorElements(vtkXMLDataElement * tree)537 void vtkXMLUtilities::FactorElements(vtkXMLDataElement* tree)
538 {
539   if (!tree)
540   {
541     return;
542   }
543 
544   // Create the factored pool, and add it to the tree so that it can
545   // factor itself too
546 
547   vtkXMLDataElement* pool = vtkXMLDataElement::New();
548   pool->SetName(VTK_XML_UTILITIES_FACTORED_POOL_NAME);
549   pool->SetAttributeEncoding(tree->GetAttributeEncoding());
550   tree->AddNestedElement(pool);
551 
552   // Factor the tree, as long as some factorization has occurred
553   // (multiple pass might be needed because larger trees are factored
554   // first)
555 
556   while (vtkXMLUtilities::FactorElementsInternal(tree, tree, pool))
557   {
558   }
559 
560   // Nothing factored, remove the useless pool
561 
562   if (!pool->GetNumberOfNestedElements())
563   {
564     tree->RemoveNestedElement(pool);
565   }
566 
567   pool->Delete();
568 }
569 
570 //------------------------------------------------------------------------------
FactorElementsInternal(vtkXMLDataElement * tree,vtkXMLDataElement * root,vtkXMLDataElement * pool)571 int vtkXMLUtilities::FactorElementsInternal(
572   vtkXMLDataElement* tree, vtkXMLDataElement* root, vtkXMLDataElement* pool)
573 {
574   if (!tree || !root || !pool)
575   {
576     return 0;
577   }
578 
579   // Do not bother factoring something already factored
580 
581   if (tree->GetName() && !strcmp(tree->GetName(), VTK_XML_UTILITIES_FACTORED_REF_NAME))
582   {
583     return 0;
584   }
585 
586   // Try to find all trees similar to the current tree
587 
588   vtkXMLDataElement** similar_trees;
589   int nb_of_similar_trees = vtkXMLUtilities::FindSimilarElements(tree, root, &similar_trees);
590 
591   // None was found, try to factor the sub-trees
592 
593   if (!nb_of_similar_trees)
594   {
595     int res = 0;
596     for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
597     {
598       res += vtkXMLUtilities::FactorElementsInternal(tree->GetNestedElement(i), root, pool);
599     }
600     return res ? 1 : 0;
601   }
602 
603   // Otherwise replace those trees with factored refs
604 
605   char buffer[5];
606   snprintf(buffer, sizeof(buffer), "%02d_", pool->GetNumberOfNestedElements());
607 
608   std::ostringstream id;
609   id << buffer << tree->GetName();
610 
611   vtkXMLDataElement* factored = vtkXMLDataElement::New();
612   factored->SetName(VTK_XML_UTILITIES_FACTORED_NAME);
613   factored->SetAttributeEncoding(pool->GetAttributeEncoding());
614   factored->SetAttribute("Id", id.str().c_str());
615   pool->AddNestedElement(factored);
616   factored->Delete();
617 
618   vtkXMLDataElement* tree_copy = vtkXMLDataElement::New();
619   tree_copy->DeepCopy(tree);
620   factored->AddNestedElement(tree_copy);
621   tree_copy->Delete();
622 
623   for (int i = 0; i < nb_of_similar_trees; i++)
624   {
625     similar_trees[i]->RemoveAllAttributes();
626     similar_trees[i]->RemoveAllNestedElements();
627     similar_trees[i]->SetCharacterData(nullptr, 0);
628     similar_trees[i]->SetName(VTK_XML_UTILITIES_FACTORED_REF_NAME);
629     similar_trees[i]->SetAttribute("Id", id.str().c_str());
630   }
631 
632   tree->RemoveAllAttributes();
633   tree->RemoveAllNestedElements();
634   tree->SetCharacterData(nullptr, 0);
635   tree->SetName(VTK_XML_UTILITIES_FACTORED_REF_NAME);
636   tree->SetAttribute("Id", id.str().c_str());
637 
638   delete[] similar_trees;
639 
640   return 1;
641 }
642 
643 //------------------------------------------------------------------------------
UnFactorElements(vtkXMLDataElement * tree)644 void vtkXMLUtilities::UnFactorElements(vtkXMLDataElement* tree)
645 {
646   if (!tree)
647   {
648     return;
649   }
650 
651   // Search for the factored pool, if not found, we are done
652 
653   vtkXMLDataElement* pool = tree->FindNestedElementWithName(VTK_XML_UTILITIES_FACTORED_POOL_NAME);
654   if (!pool)
655   {
656     return;
657   }
658 
659   // Remove the pool from the tree, because it makes no sense
660   // unfactoring it too
661 
662   pool->Register(tree);
663   tree->RemoveNestedElement(pool);
664 
665   // Unfactor the tree
666 
667   vtkXMLUtilities::UnFactorElementsInternal(tree, pool);
668 
669   // Remove the useless empty pool
670 
671   pool->UnRegister(tree);
672 }
673 
674 //------------------------------------------------------------------------------
UnFactorElementsInternal(vtkXMLDataElement * tree,vtkXMLDataElement * pool)675 int vtkXMLUtilities::UnFactorElementsInternal(vtkXMLDataElement* tree, vtkXMLDataElement* pool)
676 {
677   if (!tree || !pool)
678   {
679     return 0;
680   }
681 
682   int res = 0;
683 
684   // We found a factor, replace it with the corresponding sub-tree
685 
686   if (tree->GetName() && !strcmp(tree->GetName(), VTK_XML_UTILITIES_FACTORED_REF_NAME))
687   {
688     vtkXMLDataElement* original_tree = pool->FindNestedElementWithNameAndAttribute(
689       VTK_XML_UTILITIES_FACTORED_NAME, "Id", tree->GetAttribute("Id"));
690     if (original_tree && original_tree->GetNumberOfNestedElements())
691     {
692       tree->DeepCopy(original_tree->GetNestedElement(0));
693       res++;
694     }
695   }
696 
697   // Now try to unfactor the sub-trees
698 
699   for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
700   {
701     res += vtkXMLUtilities::UnFactorElementsInternal(tree->GetNestedElement(i), pool);
702   }
703 
704   return res ? 1 : 0;
705 }
706