1 /*=========================================================================
2
3 Program: Visualization Toolkit
4 Module: vtkXMLUtilities.cxx
5
6 Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
7 All rights reserved.
8 See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
9
10 This software is distributed WITHOUT ANY WARRANTY; without even
11 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12 PURPOSE. See the above copyright notice for more information.
13
14 =========================================================================*/
15 #include "vtkXMLUtilities.h"
16
17 #include "vtkObjectFactory.h"
18 #include "vtkXMLDataElement.h"
19 #include "vtkXMLDataParser.h"
20 #include "vtksys/FStream.hxx"
21
22 #include <sstream>
23
24 #if !defined(_WIN32) || defined(__CYGWIN__)
25 #include <unistd.h> /* unlink */
26 #else
27 #include <io.h> /* unlink */
28 #endif
29
30 #include <vector>
31
32 typedef std::vector<vtkXMLDataElement*> vtkXMLUtilitiesDataElementContainer;
33
34 vtkStandardNewMacro(vtkXMLUtilities);
35
PrintSelf(ostream & os,vtkIndent indent)36 void vtkXMLUtilities::PrintSelf(ostream& os, vtkIndent indent)
37 {
38 this->Superclass::PrintSelf(os, indent);
39 }
40
41 #define VTK_XML_UTILITIES_FACTORED_POOL_NAME "FactoredPool"
42 #define VTK_XML_UTILITIES_FACTORED_NAME "Factored"
43 #define VTK_XML_UTILITIES_FACTORED_REF_NAME "FactoredRef"
44
45 //------------------------------------------------------------------------------
vtkXMLUtilitiesEncodeEntities(unsigned char c,ostream & output)46 inline int vtkXMLUtilitiesEncodeEntities(unsigned char c, ostream& output)
47 {
48 switch (c)
49 {
50 case '&':
51 output << "&";
52 return 1;
53
54 case '"':
55 output << """;
56 return 1;
57
58 case '\'':
59 output << "'";
60 return 1;
61
62 case '<':
63 output << "<";
64 return 1;
65
66 case '>':
67 output << ">";
68 return 1;
69 }
70
71 return 0;
72 }
73
74 //------------------------------------------------------------------------------
EncodeString(const char * input,int input_encoding,ostream & output,int output_encoding,int special_entities)75 void vtkXMLUtilities::EncodeString(
76 const char* input, int input_encoding, ostream& output, int output_encoding, int special_entities)
77 {
78 // No string
79
80 if (!input)
81 {
82 return;
83 }
84
85 // If either the input or output encoding is not specified,
86 // or they are the same, dump as is (if no entities had to be converted)
87
88 int no_input_encoding =
89 (input_encoding <= VTK_ENCODING_NONE || input_encoding >= VTK_ENCODING_UNKNOWN);
90
91 int no_output_encoding =
92 (output_encoding <= VTK_ENCODING_NONE || output_encoding >= VTK_ENCODING_UNKNOWN);
93
94 if (!special_entities &&
95 (no_input_encoding || no_output_encoding || input_encoding == output_encoding))
96 {
97 output << input;
98 return;
99 }
100
101 // Convert
102
103 const unsigned char* str = (const unsigned char*)input;
104
105 // If either the input or output encoding is not specified, just process
106 // the entities
107
108 if (no_input_encoding || no_output_encoding)
109 {
110 while (*str)
111 {
112 if (!vtkXMLUtilitiesEncodeEntities(*str, output))
113 {
114 output << *str;
115 }
116 str++;
117 }
118 return;
119 }
120
121 // To VTK_UTF_8...
122
123 if (output_encoding == VTK_ENCODING_UTF_8)
124 {
125 int from_iso_8859 =
126 (input_encoding >= VTK_ENCODING_ISO_8859_1 && input_encoding <= VTK_ENCODING_ISO_8859_16);
127
128 // From ISO-8859 or US-ASCII
129
130 if (input_encoding == VTK_ENCODING_US_ASCII || from_iso_8859)
131 {
132 while (*str)
133 {
134 if (!special_entities || !vtkXMLUtilitiesEncodeEntities(*str, output))
135 {
136 if (*str > 0x7F)
137 {
138 #if 0
139 // This should be the right implementation, but it seems that
140 // it just does not work for Expat. Brad and I should dig into
141 // that later, but it seems weird. In the meantime, just
142 // output the hex representation.
143
144 output << "&#x"
145 << hex << (0xC0 | (*str >> 6))
146 << hex << (0x80 | (*str & 0x3F))
147 << ';';
148 #else
149 output << "&#x" << hex << (int)(*str) << ';';
150 #endif
151 }
152 else if (*str < 30)
153 {
154 output << "&#x" << hex << (int)(*str) << ';';
155 }
156 else
157 {
158 output << *str;
159 }
160 }
161 str++;
162 }
163 }
164
165 // From VTK_ENCODING_UTF_8 (i.e. just encode the entities)
166 // To be completed (need the whole &#x)
167
168 else if (input_encoding == VTK_ENCODING_UTF_8)
169 {
170 while (*str)
171 {
172 if (!vtkXMLUtilitiesEncodeEntities(*str, output))
173 {
174 output << *str;
175 }
176 str++;
177 }
178 }
179
180 // Unsupported input encoding
181
182 else
183 {
184 vtkGenericWarningMacro(<< "Input encoding not supported (" << input_encoding << ")");
185 }
186 }
187
188 // From VTK_ENCODING_UTF_8...
189
190 else if (input_encoding == VTK_ENCODING_UTF_8)
191 {
192 int to_iso_8859 =
193 (output_encoding >= VTK_ENCODING_ISO_8859_1 && output_encoding <= VTK_ENCODING_ISO_8859_16);
194
195 // To US-ASCII or ISO 8859
196
197 if (output_encoding == VTK_ENCODING_US_ASCII || to_iso_8859)
198 {
199 while (*str)
200 {
201 if (!special_entities || !vtkXMLUtilitiesEncodeEntities(*str, output))
202 {
203 // Multi-byte 2-chars converted into one char
204
205 if (*str > 0x7F)
206 {
207 output << (unsigned char)((*str << 6) | (str[1] & 0x3F));
208 str++;
209 }
210 else
211 {
212 output << *str;
213 }
214 }
215 str++;
216 }
217 }
218
219 // Unsupported output encoding
220
221 else
222 {
223 vtkGenericWarningMacro(<< "Output encoding not supported (" << input_encoding << ")");
224 }
225 }
226 }
227
228 //------------------------------------------------------------------------------
CollateAttributes(vtkXMLDataElement * elem,ostream & os,const char * sep)229 void vtkXMLUtilities::CollateAttributes(vtkXMLDataElement* elem, ostream& os, const char* sep)
230 {
231 if (!elem)
232 {
233 return;
234 }
235
236 int i, nb = elem->GetNumberOfAttributes();
237 for (i = 0; i < nb; i++)
238 {
239 const char* name = elem->GetAttributeName(i);
240 if (name)
241 {
242 const char* value = elem->GetAttribute(name);
243 if (value)
244 {
245 if (i)
246 {
247 os << (sep ? sep : " ");
248 }
249 os << name << "=\"";
250 vtkXMLUtilities::EncodeString(
251 value, elem->GetAttributeEncoding(), os, VTK_ENCODING_UTF_8, 1);
252 os << '\"';
253 }
254 }
255 }
256 }
257
258 //------------------------------------------------------------------------------
FlattenElement(vtkXMLDataElement * elem,ostream & os,vtkIndent * indent,int indent_attributes)259 void vtkXMLUtilities::FlattenElement(
260 vtkXMLDataElement* elem, ostream& os, vtkIndent* indent, int indent_attributes)
261 {
262 if (!elem)
263 {
264 return;
265 }
266
267 unsigned long pos = os.tellp();
268
269 // Name
270
271 if (indent)
272 {
273 os << *indent;
274 }
275
276 os << '<' << elem->GetName();
277
278 // Attributes
279
280 if (elem->GetNumberOfAttributes())
281 {
282 os << ' ';
283 if (indent && indent_attributes)
284 {
285 unsigned long len = (unsigned long)os.tellp() - pos;
286 if (os.fail())
287 {
288 return;
289 }
290 char* sep = new char[1 + len + 1];
291 sep[0] = '\n';
292 memset(sep + 1, ' ', len);
293 sep[len + 1] = '\0';
294 vtkXMLUtilities::CollateAttributes(elem, os, sep);
295 delete[] sep;
296 }
297 else
298 {
299 vtkXMLUtilities::CollateAttributes(elem, os);
300 }
301 }
302
303 const char* cdata = elem->GetCharacterData();
304 int nb_nested = elem->GetNumberOfNestedElements();
305 int need_close_tag = (nb_nested || cdata);
306
307 if (!need_close_tag)
308 {
309 os << "/>";
310 }
311 else
312 {
313 os << '>';
314 }
315
316 // cdata
317
318 if (cdata)
319 {
320 vtkXMLUtilities::EncodeString(cdata, elem->GetAttributeEncoding(), os, VTK_ENCODING_UTF_8, 1);
321 }
322
323 // Nested elements
324
325 if (nb_nested)
326 {
327 if (indent)
328 {
329 os << '\n';
330 }
331 for (int i = 0; i < nb_nested; i++)
332 {
333 if (indent)
334 {
335 vtkIndent next_indent = indent->GetNextIndent();
336 vtkXMLUtilities::FlattenElement(elem->GetNestedElement(i), os, &next_indent);
337 }
338 else
339 {
340 vtkXMLUtilities::FlattenElement(elem->GetNestedElement(i), os);
341 }
342 }
343 if (indent)
344 {
345 os << *indent;
346 }
347 }
348
349 // Close
350
351 if (need_close_tag)
352 {
353 os << "</" << elem->GetName() << '>';
354 }
355
356 if (indent)
357 {
358 os << '\n';
359 }
360 }
361
362 //------------------------------------------------------------------------------
WriteElementToFile(vtkXMLDataElement * elem,const char * filename,vtkIndent * indent)363 int vtkXMLUtilities::WriteElementToFile(
364 vtkXMLDataElement* elem, const char* filename, vtkIndent* indent)
365 {
366 if (!elem || !filename)
367 {
368 return 0;
369 }
370
371 vtksys::ofstream os(filename, ios::out);
372 vtkXMLUtilities::FlattenElement(elem, os, indent);
373
374 os.flush();
375 if (os.fail())
376 {
377 os.close();
378 unlink(filename);
379 return 0;
380 }
381 return 1;
382 }
383
384 //------------------------------------------------------------------------------
ReadElementFromStream(istream & is,int encoding)385 vtkXMLDataElement* vtkXMLUtilities::ReadElementFromStream(istream& is, int encoding)
386 {
387 vtkXMLDataElement* res = nullptr;
388 vtkXMLDataParser* xml_parser = vtkXMLDataParser::New();
389 xml_parser->SetAttributesEncoding(encoding);
390
391 xml_parser->SetStream(&is);
392 if (xml_parser->Parse())
393 {
394 res = xml_parser->GetRootElement();
395 // Bump up the ref count since we are going to delete the parser
396 // which actually owns the element
397 res->SetReferenceCount(res->GetReferenceCount() + 1);
398 vtkXMLUtilities::UnFactorElements(res);
399 }
400
401 xml_parser->Delete();
402 return res;
403 }
404
405 //------------------------------------------------------------------------------
ReadElementFromString(const char * str,int encoding)406 vtkXMLDataElement* vtkXMLUtilities::ReadElementFromString(const char* str, int encoding)
407 {
408 if (!str)
409 {
410 return nullptr;
411 }
412
413 std::stringstream strstr;
414 strstr << str;
415 vtkXMLDataElement* res = vtkXMLUtilities::ReadElementFromStream(strstr, encoding);
416
417 return res;
418 }
419
420 //------------------------------------------------------------------------------
ReadElementFromFile(const char * filename,int encoding)421 vtkXMLDataElement* vtkXMLUtilities::ReadElementFromFile(const char* filename, int encoding)
422 {
423 if (!filename)
424 {
425 return nullptr;
426 }
427
428 vtksys::ifstream is(filename);
429 return vtkXMLUtilities::ReadElementFromStream(is, encoding);
430 }
431
432 //------------------------------------------------------------------------------
ReadElementFromAttributeArray(vtkXMLDataElement * element,const char ** atts,int encoding)433 void vtkXMLUtilities::ReadElementFromAttributeArray(
434 vtkXMLDataElement* element, const char** atts, int encoding)
435 {
436 if (atts)
437 {
438 // If the target encoding is VTK_ENCODING_NONE or VTK_ENCODING_UNKNOWN,
439 // then keep the internal/default encoding, otherwise encode each
440 // attribute using that new format
441
442 if (encoding != VTK_ENCODING_NONE && encoding != VTK_ENCODING_UNKNOWN)
443 {
444 element->SetAttributeEncoding(encoding);
445 }
446
447 // Process each attributes returned by Expat in UTF-8 encoding, and
448 // convert them to our encoding
449
450 for (int i = 0; atts[i] && atts[i + 1]; i += 2)
451 {
452 if (element->GetAttributeEncoding() == VTK_ENCODING_UTF_8)
453 {
454 element->SetAttribute(atts[i], atts[i + 1]);
455 }
456 else
457 {
458 std::ostringstream str;
459 vtkXMLUtilities::EncodeString(
460 atts[i + 1], VTK_ENCODING_UTF_8, str, element->GetAttributeEncoding(), 0);
461 str << ends;
462 element->SetAttribute(atts[i], str.str().c_str());
463 }
464 }
465 }
466 }
467
468 //------------------------------------------------------------------------------
vtkXMLUtilitiesFindSimilarElementsInternal(vtkXMLDataElement * elem,vtkXMLDataElement * tree,vtkXMLUtilitiesDataElementContainer * results)469 static void vtkXMLUtilitiesFindSimilarElementsInternal(
470 vtkXMLDataElement* elem, vtkXMLDataElement* tree, vtkXMLUtilitiesDataElementContainer* results)
471 {
472 if (!elem || !tree || !results || elem == tree)
473 {
474 return;
475 }
476
477 // If the element is equal to the current tree, append it to the
478 // results, otherwise check the sub-trees
479
480 if (elem->IsEqualTo(tree))
481 {
482 results->push_back(tree);
483 }
484 else
485 {
486 for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
487 {
488 vtkXMLUtilitiesFindSimilarElementsInternal(elem, tree->GetNestedElement(i), results);
489 }
490 }
491 }
492
493 //------------------------------------------------------------------------------
FindSimilarElements(vtkXMLDataElement * elem,vtkXMLDataElement * tree,vtkXMLDataElement *** results)494 int vtkXMLUtilities::FindSimilarElements(
495 vtkXMLDataElement* elem, vtkXMLDataElement* tree, vtkXMLDataElement*** results)
496 {
497 *results = nullptr;
498
499 if (!elem || !tree)
500 {
501 return 0;
502 }
503
504 // Create a data element container, and find all similar elements
505
506 vtkXMLUtilitiesDataElementContainer* container = new vtkXMLUtilitiesDataElementContainer;
507
508 vtkXMLUtilitiesFindSimilarElementsInternal(elem, tree, container);
509
510 // If nothing was found, exit now
511
512 int size = (int)container->size();
513 if (size)
514 {
515 // Allocate an array of element and copy the contents of the container
516 // to this flat structure
517
518 *results = new vtkXMLDataElement*[size];
519
520 size = 0;
521 for (vtkXMLUtilitiesDataElementContainer::const_iterator it = container->begin();
522 it != container->end(); ++it)
523 {
524 if (*it)
525 {
526 (*results)[size++] = *it;
527 }
528 }
529 }
530
531 delete container;
532
533 return size;
534 }
535
536 //------------------------------------------------------------------------------
FactorElements(vtkXMLDataElement * tree)537 void vtkXMLUtilities::FactorElements(vtkXMLDataElement* tree)
538 {
539 if (!tree)
540 {
541 return;
542 }
543
544 // Create the factored pool, and add it to the tree so that it can
545 // factor itself too
546
547 vtkXMLDataElement* pool = vtkXMLDataElement::New();
548 pool->SetName(VTK_XML_UTILITIES_FACTORED_POOL_NAME);
549 pool->SetAttributeEncoding(tree->GetAttributeEncoding());
550 tree->AddNestedElement(pool);
551
552 // Factor the tree, as long as some factorization has occurred
553 // (multiple pass might be needed because larger trees are factored
554 // first)
555
556 while (vtkXMLUtilities::FactorElementsInternal(tree, tree, pool))
557 {
558 }
559
560 // Nothing factored, remove the useless pool
561
562 if (!pool->GetNumberOfNestedElements())
563 {
564 tree->RemoveNestedElement(pool);
565 }
566
567 pool->Delete();
568 }
569
570 //------------------------------------------------------------------------------
FactorElementsInternal(vtkXMLDataElement * tree,vtkXMLDataElement * root,vtkXMLDataElement * pool)571 int vtkXMLUtilities::FactorElementsInternal(
572 vtkXMLDataElement* tree, vtkXMLDataElement* root, vtkXMLDataElement* pool)
573 {
574 if (!tree || !root || !pool)
575 {
576 return 0;
577 }
578
579 // Do not bother factoring something already factored
580
581 if (tree->GetName() && !strcmp(tree->GetName(), VTK_XML_UTILITIES_FACTORED_REF_NAME))
582 {
583 return 0;
584 }
585
586 // Try to find all trees similar to the current tree
587
588 vtkXMLDataElement** similar_trees;
589 int nb_of_similar_trees = vtkXMLUtilities::FindSimilarElements(tree, root, &similar_trees);
590
591 // None was found, try to factor the sub-trees
592
593 if (!nb_of_similar_trees)
594 {
595 int res = 0;
596 for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
597 {
598 res += vtkXMLUtilities::FactorElementsInternal(tree->GetNestedElement(i), root, pool);
599 }
600 return res ? 1 : 0;
601 }
602
603 // Otherwise replace those trees with factored refs
604
605 char buffer[5];
606 snprintf(buffer, sizeof(buffer), "%02d_", pool->GetNumberOfNestedElements());
607
608 std::ostringstream id;
609 id << buffer << tree->GetName();
610
611 vtkXMLDataElement* factored = vtkXMLDataElement::New();
612 factored->SetName(VTK_XML_UTILITIES_FACTORED_NAME);
613 factored->SetAttributeEncoding(pool->GetAttributeEncoding());
614 factored->SetAttribute("Id", id.str().c_str());
615 pool->AddNestedElement(factored);
616 factored->Delete();
617
618 vtkXMLDataElement* tree_copy = vtkXMLDataElement::New();
619 tree_copy->DeepCopy(tree);
620 factored->AddNestedElement(tree_copy);
621 tree_copy->Delete();
622
623 for (int i = 0; i < nb_of_similar_trees; i++)
624 {
625 similar_trees[i]->RemoveAllAttributes();
626 similar_trees[i]->RemoveAllNestedElements();
627 similar_trees[i]->SetCharacterData(nullptr, 0);
628 similar_trees[i]->SetName(VTK_XML_UTILITIES_FACTORED_REF_NAME);
629 similar_trees[i]->SetAttribute("Id", id.str().c_str());
630 }
631
632 tree->RemoveAllAttributes();
633 tree->RemoveAllNestedElements();
634 tree->SetCharacterData(nullptr, 0);
635 tree->SetName(VTK_XML_UTILITIES_FACTORED_REF_NAME);
636 tree->SetAttribute("Id", id.str().c_str());
637
638 delete[] similar_trees;
639
640 return 1;
641 }
642
643 //------------------------------------------------------------------------------
UnFactorElements(vtkXMLDataElement * tree)644 void vtkXMLUtilities::UnFactorElements(vtkXMLDataElement* tree)
645 {
646 if (!tree)
647 {
648 return;
649 }
650
651 // Search for the factored pool, if not found, we are done
652
653 vtkXMLDataElement* pool = tree->FindNestedElementWithName(VTK_XML_UTILITIES_FACTORED_POOL_NAME);
654 if (!pool)
655 {
656 return;
657 }
658
659 // Remove the pool from the tree, because it makes no sense
660 // unfactoring it too
661
662 pool->Register(tree);
663 tree->RemoveNestedElement(pool);
664
665 // Unfactor the tree
666
667 vtkXMLUtilities::UnFactorElementsInternal(tree, pool);
668
669 // Remove the useless empty pool
670
671 pool->UnRegister(tree);
672 }
673
674 //------------------------------------------------------------------------------
UnFactorElementsInternal(vtkXMLDataElement * tree,vtkXMLDataElement * pool)675 int vtkXMLUtilities::UnFactorElementsInternal(vtkXMLDataElement* tree, vtkXMLDataElement* pool)
676 {
677 if (!tree || !pool)
678 {
679 return 0;
680 }
681
682 int res = 0;
683
684 // We found a factor, replace it with the corresponding sub-tree
685
686 if (tree->GetName() && !strcmp(tree->GetName(), VTK_XML_UTILITIES_FACTORED_REF_NAME))
687 {
688 vtkXMLDataElement* original_tree = pool->FindNestedElementWithNameAndAttribute(
689 VTK_XML_UTILITIES_FACTORED_NAME, "Id", tree->GetAttribute("Id"));
690 if (original_tree && original_tree->GetNumberOfNestedElements())
691 {
692 tree->DeepCopy(original_tree->GetNestedElement(0));
693 res++;
694 }
695 }
696
697 // Now try to unfactor the sub-trees
698
699 for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
700 {
701 res += vtkXMLUtilities::UnFactorElementsInternal(tree->GetNestedElement(i), pool);
702 }
703
704 return res ? 1 : 0;
705 }
706