1 /*=========================================================================
2 
3   Program:   Visualization Toolkit
4   Module:    vtkXMLUtilities.cxx
5 
6   Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
7   All rights reserved.
8   See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
9 
10      This software is distributed WITHOUT ANY WARRANTY; without even
11      the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12      PURPOSE.  See the above copyright notice for more information.
13 
14 =========================================================================*/
15 #include "vtkXMLUtilities.h"
16 
17 #include "vtkObjectFactory.h"
18 #include "vtkXMLDataElement.h"
19 #include "vtkXMLDataParser.h"
20 
21 #include <sstream>
22 
23 #if !defined(_WIN32) || defined(__CYGWIN__)
24 # include <unistd.h> /* unlink */
25 #else
26 # include <io.h> /* unlink */
27 #endif
28 
29 #include <vector>
30 
31 typedef std::vector<vtkXMLDataElement*> vtkXMLUtilitiesDataElementContainer;
32 
33 vtkStandardNewMacro(vtkXMLUtilities);
34 
35 #define  VTK_XML_UTILITIES_FACTORED_POOL_NAME "FactoredPool"
36 #define  VTK_XML_UTILITIES_FACTORED_NAME      "Factored"
37 #define  VTK_XML_UTILITIES_FACTORED_REF_NAME  "FactoredRef"
38 
39 //----------------------------------------------------------------------------
vtkXMLUtilitiesEncodeEntities(unsigned char c,ostream & output)40 inline int vtkXMLUtilitiesEncodeEntities(unsigned char c, ostream &output)
41 {
42   switch (c)
43   {
44     case '&':
45       output << "&amp;";
46       return 1;
47 
48     case '"':
49       output << "&quot;";
50       return 1;
51 
52     case '\'':
53       output << "&apos;";
54       return 1;
55 
56     case '<':
57       output << "&lt;";
58       return 1;
59 
60     case '>':
61       output << "&gt;";
62       return 1;
63   }
64 
65   return 0;
66 }
67 
68 //----------------------------------------------------------------------------
EncodeString(const char * input,int input_encoding,ostream & output,int output_encoding,int special_entities)69 void vtkXMLUtilities::EncodeString(const char *input, int input_encoding,
70                                    ostream &output, int output_encoding,
71                                    int special_entities)
72 {
73   // No string
74 
75   if (!input)
76   {
77     return;
78   }
79 
80   // If either the input or output encoding is not specified,
81   // or they are the same, dump as is (if no entities had to be converted)
82 
83   int no_input_encoding = (input_encoding <= VTK_ENCODING_NONE ||
84                            input_encoding >= VTK_ENCODING_UNKNOWN);
85 
86   int no_output_encoding = (output_encoding <= VTK_ENCODING_NONE ||
87                             output_encoding >= VTK_ENCODING_UNKNOWN);
88 
89   if (!special_entities &&
90       (no_input_encoding || no_output_encoding ||
91        input_encoding == output_encoding))
92   {
93     output << input;
94     return;
95   }
96 
97   // Convert
98 
99   const unsigned char *str = (const unsigned char*)input;
100 
101   // If either the input or output encoding is not specified, just process
102   // the entities
103 
104   if (no_input_encoding || no_output_encoding)
105   {
106     while (*str)
107     {
108       if (!vtkXMLUtilitiesEncodeEntities(*str, output))
109       {
110         output << *str;
111       }
112       str++;
113     }
114     return;
115   }
116 
117   // To VTK_UTF_8...
118 
119   if (output_encoding == VTK_ENCODING_UTF_8)
120   {
121     int from_iso_8859 = (input_encoding >= VTK_ENCODING_ISO_8859_1 &&
122                          input_encoding <= VTK_ENCODING_ISO_8859_16);
123 
124     // From ISO-8859 or US-ASCII
125 
126     if (input_encoding == VTK_ENCODING_US_ASCII || from_iso_8859)
127     {
128       while (*str)
129       {
130         if (!special_entities || !vtkXMLUtilitiesEncodeEntities(*str, output))
131         {
132           if (*str > 0x7F)
133           {
134 #if 0
135             // This should be the right implementation, but it seems that
136             // it just does not work for Expat. Brad and I should dig into
137             // that later, but it seems weird. In the meantime, just
138             // output the hex representation.
139 
140             output << "&#x"
141                    << hex << (0xC0 | (*str >> 6))
142                    << hex << (0x80 | (*str & 0x3F))
143                    << ';';
144 #else
145             output << "&#x" << hex << (int)(*str) << ';';
146 #endif
147           }
148           else if (*str < 30)
149           {
150             output << "&#x" << hex << (int)(*str) << ';';
151           }
152           else
153           {
154             output << *str;
155           }
156         }
157         str++;
158       }
159     }
160 
161     // From VTK_ENCODING_UTF_8 (i.e. just encode the entities)
162     // To be completed (need the whole &#x)
163 
164     else if (input_encoding == VTK_ENCODING_UTF_8)
165     {
166       while (*str)
167       {
168         if (!vtkXMLUtilitiesEncodeEntities(*str, output))
169         {
170           output << *str;
171         }
172         str++;
173       }
174     }
175 
176     // Unsupported input encoding
177 
178     else
179     {
180       vtkGenericWarningMacro(
181         << "Input encoding not supported (" << input_encoding << ")");
182     }
183   }
184 
185   // From VTK_ENCODING_UTF_8...
186 
187   else if (input_encoding == VTK_ENCODING_UTF_8)
188   {
189     int to_iso_8859 = (output_encoding >= VTK_ENCODING_ISO_8859_1 &&
190                        output_encoding <=VTK_ENCODING_ISO_8859_16);
191 
192     // To US-ASCII or ISO 8859
193 
194     if (output_encoding == VTK_ENCODING_US_ASCII || to_iso_8859)
195     {
196       while (*str)
197       {
198         if (!special_entities || !vtkXMLUtilitiesEncodeEntities(*str, output))
199         {
200           // Multi-byte 2-chars converted into one char
201 
202           if (*str > 0x7F)
203           {
204             output << (unsigned char)((*str << 6) | (str[1] & 0x3F));
205             str++;
206           }
207           else
208           {
209             output << *str;
210           }
211         }
212         str++;
213       }
214     }
215 
216     // Unsupported output encoding
217 
218     else
219     {
220       vtkGenericWarningMacro(
221         << "Output encoding not supported (" << input_encoding << ")");
222     }
223   }
224 }
225 
226 //----------------------------------------------------------------------------
CollateAttributes(vtkXMLDataElement * elem,ostream & os,const char * sep)227 void vtkXMLUtilities::CollateAttributes(vtkXMLDataElement *elem,
228                                         ostream &os,
229                                         const char *sep)
230 {
231   if (!elem)
232   {
233     return;
234   }
235 
236   int i, nb = elem->GetNumberOfAttributes();
237   for (i = 0; i < nb; i++)
238   {
239     const char *name = elem->GetAttributeName(i);
240     if (name)
241     {
242       const char *value = elem->GetAttribute(name);
243       if (value)
244       {
245         if (i)
246         {
247           os << (sep ? sep : " ");
248         }
249         os << name << "=\"";
250         vtkXMLUtilities::EncodeString(
251           value, elem->GetAttributeEncoding(), os, VTK_ENCODING_UTF_8, 1);
252         os << '\"';
253       }
254     }
255   }
256 }
257 
258 //----------------------------------------------------------------------------
FlattenElement(vtkXMLDataElement * elem,ostream & os,vtkIndent * indent,int indent_attributes)259 void vtkXMLUtilities::FlattenElement(vtkXMLDataElement *elem,
260                                      ostream &os,
261                                      vtkIndent *indent,
262                                      int indent_attributes)
263 {
264   if (!elem)
265   {
266     return;
267   }
268 
269   unsigned long pos = os.tellp();
270 
271   // Name
272 
273   if (indent)
274   {
275     os << *indent;
276   }
277 
278   os << '<' << elem->GetName();
279 
280   // Attributes
281 
282   if (elem->GetNumberOfAttributes())
283   {
284     os << ' ';
285     if (indent && indent_attributes)
286     {
287       unsigned long len = (unsigned long)os.tellp() - pos;
288       if (os.fail())
289       {
290         return;
291       }
292       char *sep = new char [1 + len + 1];
293       sep[0] = '\n';
294       memset(sep + 1, ' ', len);
295       sep[len + 1] = '\0';
296       vtkXMLUtilities::CollateAttributes(elem, os, sep);
297       delete [] sep;
298     }
299     else
300     {
301       vtkXMLUtilities::CollateAttributes(elem, os);
302     }
303   }
304 
305   const char *cdata = elem->GetCharacterData();
306   int nb_nested = elem->GetNumberOfNestedElements();
307   int need_close_tag = (nb_nested || cdata);
308 
309   if (!need_close_tag)
310   {
311     os << "/>";
312   }
313   else
314   {
315     os << '>';
316   }
317 
318   // cdata
319 
320   if (cdata)
321   {
322     vtkXMLUtilities::EncodeString(
323       cdata, elem->GetAttributeEncoding(), os, VTK_ENCODING_UTF_8, 1);
324   }
325 
326   // Nested elements
327 
328   if (nb_nested)
329   {
330     if (indent)
331     {
332       os << '\n';
333     }
334     for (int i = 0; i < nb_nested; i++)
335     {
336       if (indent)
337       {
338         vtkIndent next_indent = indent->GetNextIndent();
339         vtkXMLUtilities::FlattenElement(elem->GetNestedElement(i),
340                                         os, &next_indent);
341       }
342       else
343       {
344         vtkXMLUtilities::FlattenElement(elem->GetNestedElement(i), os);
345       }
346     }
347     if (indent)
348     {
349       os << *indent;
350     }
351   }
352 
353   // Close
354 
355   if (need_close_tag)
356   {
357     os << "</" << elem->GetName() << '>';
358   }
359 
360   if (indent)
361   {
362     os << '\n';
363   }
364 }
365 
366 //----------------------------------------------------------------------------
WriteElementToFile(vtkXMLDataElement * elem,const char * filename,vtkIndent * indent)367 int vtkXMLUtilities::WriteElementToFile(vtkXMLDataElement *elem,
368                                         const char *filename,
369                                         vtkIndent *indent)
370 {
371   if (!elem || !filename)
372   {
373     return 0;
374   }
375 
376   ofstream os(filename, ios::out);
377   vtkXMLUtilities::FlattenElement(elem, os, indent);
378 
379   os.flush();
380   if (os.fail())
381   {
382     os.close();
383     unlink(filename);
384     return 0;
385   }
386   return 1;
387 }
388 
389 //----------------------------------------------------------------------------
390 vtkXMLDataElement*
ReadElementFromStream(istream & is,int encoding)391 vtkXMLUtilities::ReadElementFromStream(istream &is, int encoding)
392 {
393   vtkXMLDataElement *res = nullptr;
394   vtkXMLDataParser* xml_parser = vtkXMLDataParser::New();
395   xml_parser->SetAttributesEncoding(encoding);
396 
397   xml_parser->SetStream(&is);
398   if (xml_parser->Parse())
399   {
400     res = xml_parser->GetRootElement();
401     // Bump up the ref count since we are going to delete the parser
402     // which actually owns the element
403     res->SetReferenceCount(res->GetReferenceCount() + 1);
404     vtkXMLUtilities::UnFactorElements(res);
405   }
406 
407   xml_parser->Delete();
408   return res;
409 }
410 
411 //----------------------------------------------------------------------------
412 vtkXMLDataElement*
ReadElementFromString(const char * str,int encoding)413 vtkXMLUtilities::ReadElementFromString(const char *str, int encoding)
414 {
415   if (!str)
416   {
417     return nullptr;
418   }
419 
420   std::stringstream strstr;
421   strstr << str;
422   vtkXMLDataElement *res =
423     vtkXMLUtilities::ReadElementFromStream(strstr, encoding);
424 
425   return res;
426 }
427 
428 //----------------------------------------------------------------------------
429 vtkXMLDataElement*
ReadElementFromFile(const char * filename,int encoding)430 vtkXMLUtilities::ReadElementFromFile(const char *filename, int encoding)
431 {
432   if (!filename)
433   {
434     return nullptr;
435   }
436 
437   ifstream is(filename);
438   return vtkXMLUtilities::ReadElementFromStream(is, encoding);
439 }
440 
441 //----------------------------------------------------------------------------
ReadElementFromAttributeArray(vtkXMLDataElement * element,const char ** atts,int encoding)442 void vtkXMLUtilities::ReadElementFromAttributeArray(
443         vtkXMLDataElement *element,
444         const char** atts,
445         int encoding)
446 {
447   if(atts)
448   {
449     // If the target encoding is VTK_ENCODING_NONE or VTK_ENCODING_UNKNOWN,
450     // then keep the internal/default encoding, otherwise encode each
451     // attribute using that new format
452 
453     if (encoding != VTK_ENCODING_NONE && encoding != VTK_ENCODING_UNKNOWN)
454     {
455       element->SetAttributeEncoding(encoding);
456     }
457 
458     // Process each attributes returned by Expat in UTF-8 encoding, and
459     // convert them to our encoding
460 
461     for (int i = 0; atts[i] && atts[i + 1]; i += 2)
462     {
463       if (element->GetAttributeEncoding() == VTK_ENCODING_UTF_8)
464       {
465         element->SetAttribute(atts[i], atts[i + 1]);
466       }
467       else
468       {
469         std::ostringstream str;
470         vtkXMLUtilities::EncodeString(
471           atts[i+1], VTK_ENCODING_UTF_8, str, element->GetAttributeEncoding(), 0);
472         str << ends;
473         element->SetAttribute(atts[i], str.str().c_str());
474       }
475     }
476   }
477 }
478 
479 //----------------------------------------------------------------------------
vtkXMLUtilitiesFindSimilarElementsInternal(vtkXMLDataElement * elem,vtkXMLDataElement * tree,vtkXMLUtilitiesDataElementContainer * results)480 static void vtkXMLUtilitiesFindSimilarElementsInternal(
481   vtkXMLDataElement *elem,
482   vtkXMLDataElement *tree,
483   vtkXMLUtilitiesDataElementContainer *results)
484 {
485   if (!elem || !tree || !results || elem == tree)
486   {
487     return;
488   }
489 
490   // If the element is equal to the current tree, append it to the
491   // results, otherwise check the sub-trees
492 
493   if (elem->IsEqualTo(tree))
494   {
495     results->push_back(tree);
496   }
497   else
498   {
499     for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
500     {
501       vtkXMLUtilitiesFindSimilarElementsInternal(
502         elem, tree->GetNestedElement(i), results);
503     }
504   }
505 }
506 
507 //----------------------------------------------------------------------------
FindSimilarElements(vtkXMLDataElement * elem,vtkXMLDataElement * tree,vtkXMLDataElement *** results)508 int vtkXMLUtilities::FindSimilarElements(vtkXMLDataElement *elem,
509                                          vtkXMLDataElement *tree,
510                                          vtkXMLDataElement ***results)
511 {
512   *results = nullptr;
513 
514   if (!elem || ! tree)
515   {
516     return 0;
517   }
518 
519   // Create a data element container, and find all similar elements
520 
521   vtkXMLUtilitiesDataElementContainer *container =
522     new vtkXMLUtilitiesDataElementContainer;
523 
524   vtkXMLUtilitiesFindSimilarElementsInternal(elem, tree, container);
525 
526   // If nothing was found, exit now
527 
528   int size = (int)container->size();
529   if (size)
530   {
531     // Allocate an array of element and copy the contents of the container
532     // to this flat structure
533 
534     *results = new vtkXMLDataElement* [size];
535 
536     size = 0;
537     for (vtkXMLUtilitiesDataElementContainer::const_iterator
538            it = container->begin(); it != container->end(); ++it)
539     {
540       if (*it)
541       {
542         (*results)[size++] = *it;
543       }
544     }
545   }
546 
547   delete container;
548 
549   return size;
550 }
551 
552 //----------------------------------------------------------------------------
FactorElements(vtkXMLDataElement * tree)553 void vtkXMLUtilities::FactorElements(vtkXMLDataElement *tree)
554 {
555   if (!tree)
556   {
557     return;
558   }
559 
560   // Create the factored pool, and add it to the tree so that it can
561   // factor itself too
562 
563   vtkXMLDataElement *pool = vtkXMLDataElement::New();
564   pool->SetName(VTK_XML_UTILITIES_FACTORED_POOL_NAME);
565   pool->SetAttributeEncoding(tree->GetAttributeEncoding());
566   tree->AddNestedElement(pool);
567 
568   // Factor the tree, as long as some factorization has occurred
569   // (multiple pass might be needed because larger trees are factored
570   // first)
571 
572   while (vtkXMLUtilities::FactorElementsInternal(tree, tree, pool)) {};
573 
574   // Nothing factored, remove the useless pool
575 
576   if (!pool->GetNumberOfNestedElements())
577   {
578     tree->RemoveNestedElement(pool);
579   }
580 
581   pool->Delete();
582 }
583 
584 //----------------------------------------------------------------------------
FactorElementsInternal(vtkXMLDataElement * tree,vtkXMLDataElement * root,vtkXMLDataElement * pool)585 int vtkXMLUtilities::FactorElementsInternal(vtkXMLDataElement *tree,
586                                             vtkXMLDataElement *root,
587                                             vtkXMLDataElement *pool)
588 {
589   if (!tree || !root || !pool)
590   {
591     return 0;
592   }
593 
594   // Do not bother factoring something already factored
595 
596   if (tree->GetName() &&
597       !strcmp(tree->GetName(), VTK_XML_UTILITIES_FACTORED_REF_NAME))
598   {
599     return 0;
600   }
601 
602   // Try to find all trees similar to the current tree
603 
604   vtkXMLDataElement **similar_trees;
605   int nb_of_similar_trees = vtkXMLUtilities::FindSimilarElements(
606     tree, root, &similar_trees);
607 
608   // None was found, try to factor the sub-trees
609 
610   if (!nb_of_similar_trees)
611   {
612     int res = 0;
613     for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
614     {
615       res += vtkXMLUtilities::FactorElementsInternal(
616         tree->GetNestedElement(i), root, pool);
617     }
618     return res ? 1 : 0;
619   }
620 
621   // Otherwise replace those trees with factored refs
622 
623   char buffer[5];
624   snprintf(buffer, sizeof(buffer), "%02d_", pool->GetNumberOfNestedElements());
625 
626   std::ostringstream id;
627   id << buffer << tree->GetName();
628 
629   vtkXMLDataElement *factored = vtkXMLDataElement::New();
630   factored->SetName(VTK_XML_UTILITIES_FACTORED_NAME);
631   factored->SetAttributeEncoding(pool->GetAttributeEncoding());
632   factored->SetAttribute("Id", id.str().c_str());
633   pool->AddNestedElement(factored);
634   factored->Delete();
635 
636   vtkXMLDataElement *tree_copy = vtkXMLDataElement::New();
637   tree_copy->DeepCopy(tree);
638   factored->AddNestedElement(tree_copy);
639   tree_copy->Delete();
640 
641   for (int i = 0; i < nb_of_similar_trees; i++)
642   {
643     similar_trees[i]->RemoveAllAttributes();
644     similar_trees[i]->RemoveAllNestedElements();
645     similar_trees[i]->SetCharacterData(nullptr, 0);
646     similar_trees[i]->SetName(VTK_XML_UTILITIES_FACTORED_REF_NAME);
647     similar_trees[i]->SetAttribute("Id", id.str().c_str());
648   }
649 
650   tree->RemoveAllAttributes();
651   tree->RemoveAllNestedElements();
652   tree->SetCharacterData(nullptr, 0);
653   tree->SetName(VTK_XML_UTILITIES_FACTORED_REF_NAME);
654   tree->SetAttribute("Id", id.str().c_str());
655 
656   delete [] similar_trees;
657 
658   return 1;
659 }
660 
661 //----------------------------------------------------------------------------
UnFactorElements(vtkXMLDataElement * tree)662 void vtkXMLUtilities::UnFactorElements(vtkXMLDataElement *tree)
663 {
664   if (!tree)
665   {
666     return;
667   }
668 
669   // Search for the factored pool, if not found, we are done
670 
671   vtkXMLDataElement *pool = tree->FindNestedElementWithName(
672     VTK_XML_UTILITIES_FACTORED_POOL_NAME);
673   if (!pool)
674   {
675     return;
676   }
677 
678   // Remove the pool from the tree, because it makes no sense
679   // unfactoring it too
680 
681   pool->Register(tree);
682   tree->RemoveNestedElement(pool);
683 
684   // Unfactor the tree
685 
686   vtkXMLUtilities::UnFactorElementsInternal(tree, pool);
687 
688   // Remove the useless empty pool
689 
690   pool->UnRegister(tree);
691 }
692 
693 //----------------------------------------------------------------------------
UnFactorElementsInternal(vtkXMLDataElement * tree,vtkXMLDataElement * pool)694 int vtkXMLUtilities::UnFactorElementsInternal(vtkXMLDataElement *tree,
695                                               vtkXMLDataElement *pool)
696 {
697   if (!tree || !pool)
698   {
699     return 0;
700   }
701 
702   int res = 0;
703 
704   // We found a factor, replace it with the corresponding sub-tree
705 
706   if (tree->GetName() &&
707       !strcmp(tree->GetName(), VTK_XML_UTILITIES_FACTORED_REF_NAME))
708   {
709     vtkXMLDataElement *original_tree =
710       pool->FindNestedElementWithNameAndAttribute(
711         VTK_XML_UTILITIES_FACTORED_NAME, "Id", tree->GetAttribute("Id"));
712     if (original_tree && original_tree->GetNumberOfNestedElements())
713     {
714       tree->DeepCopy(original_tree->GetNestedElement(0));
715       res++;
716     }
717   }
718 
719   // Now try to unfactor the sub-trees
720 
721   for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
722   {
723     res += vtkXMLUtilities::UnFactorElementsInternal(
724       tree->GetNestedElement(i), pool);
725   }
726 
727   return res ? 1 : 0;
728 }
729