1 /*=========================================================================
2
3 Program: Visualization Toolkit
4 Module: vtkXMLUtilities.cxx
5
6 Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
7 All rights reserved.
8 See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
9
10 This software is distributed WITHOUT ANY WARRANTY; without even
11 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12 PURPOSE. See the above copyright notice for more information.
13
14 =========================================================================*/
15 #include "vtkXMLUtilities.h"
16
17 #include "vtkObjectFactory.h"
18 #include "vtkXMLDataElement.h"
19 #include "vtkXMLDataParser.h"
20
21 #include <sstream>
22
23 #if !defined(_WIN32) || defined(__CYGWIN__)
24 # include <unistd.h> /* unlink */
25 #else
26 # include <io.h> /* unlink */
27 #endif
28
29 #include <vector>
30
31 typedef std::vector<vtkXMLDataElement*> vtkXMLUtilitiesDataElementContainer;
32
33 vtkStandardNewMacro(vtkXMLUtilities);
34
35 #define VTK_XML_UTILITIES_FACTORED_POOL_NAME "FactoredPool"
36 #define VTK_XML_UTILITIES_FACTORED_NAME "Factored"
37 #define VTK_XML_UTILITIES_FACTORED_REF_NAME "FactoredRef"
38
39 //----------------------------------------------------------------------------
vtkXMLUtilitiesEncodeEntities(unsigned char c,ostream & output)40 inline int vtkXMLUtilitiesEncodeEntities(unsigned char c, ostream &output)
41 {
42 switch (c)
43 {
44 case '&':
45 output << "&";
46 return 1;
47
48 case '"':
49 output << """;
50 return 1;
51
52 case '\'':
53 output << "'";
54 return 1;
55
56 case '<':
57 output << "<";
58 return 1;
59
60 case '>':
61 output << ">";
62 return 1;
63 }
64
65 return 0;
66 }
67
68 //----------------------------------------------------------------------------
EncodeString(const char * input,int input_encoding,ostream & output,int output_encoding,int special_entities)69 void vtkXMLUtilities::EncodeString(const char *input, int input_encoding,
70 ostream &output, int output_encoding,
71 int special_entities)
72 {
73 // No string
74
75 if (!input)
76 {
77 return;
78 }
79
80 // If either the input or output encoding is not specified,
81 // or they are the same, dump as is (if no entities had to be converted)
82
83 int no_input_encoding = (input_encoding <= VTK_ENCODING_NONE ||
84 input_encoding >= VTK_ENCODING_UNKNOWN);
85
86 int no_output_encoding = (output_encoding <= VTK_ENCODING_NONE ||
87 output_encoding >= VTK_ENCODING_UNKNOWN);
88
89 if (!special_entities &&
90 (no_input_encoding || no_output_encoding ||
91 input_encoding == output_encoding))
92 {
93 output << input;
94 return;
95 }
96
97 // Convert
98
99 const unsigned char *str = (const unsigned char*)input;
100
101 // If either the input or output encoding is not specified, just process
102 // the entities
103
104 if (no_input_encoding || no_output_encoding)
105 {
106 while (*str)
107 {
108 if (!vtkXMLUtilitiesEncodeEntities(*str, output))
109 {
110 output << *str;
111 }
112 str++;
113 }
114 return;
115 }
116
117 // To VTK_UTF_8...
118
119 if (output_encoding == VTK_ENCODING_UTF_8)
120 {
121 int from_iso_8859 = (input_encoding >= VTK_ENCODING_ISO_8859_1 &&
122 input_encoding <= VTK_ENCODING_ISO_8859_16);
123
124 // From ISO-8859 or US-ASCII
125
126 if (input_encoding == VTK_ENCODING_US_ASCII || from_iso_8859)
127 {
128 while (*str)
129 {
130 if (!special_entities || !vtkXMLUtilitiesEncodeEntities(*str, output))
131 {
132 if (*str > 0x7F)
133 {
134 #if 0
135 // This should be the right implementation, but it seems that
136 // it just does not work for Expat. Brad and I should dig into
137 // that later, but it seems weird. In the meantime, just
138 // output the hex representation.
139
140 output << "&#x"
141 << hex << (0xC0 | (*str >> 6))
142 << hex << (0x80 | (*str & 0x3F))
143 << ';';
144 #else
145 output << "&#x" << hex << (int)(*str) << ';';
146 #endif
147 }
148 else if (*str < 30)
149 {
150 output << "&#x" << hex << (int)(*str) << ';';
151 }
152 else
153 {
154 output << *str;
155 }
156 }
157 str++;
158 }
159 }
160
161 // From VTK_ENCODING_UTF_8 (i.e. just encode the entities)
162 // To be completed (need the whole &#x)
163
164 else if (input_encoding == VTK_ENCODING_UTF_8)
165 {
166 while (*str)
167 {
168 if (!vtkXMLUtilitiesEncodeEntities(*str, output))
169 {
170 output << *str;
171 }
172 str++;
173 }
174 }
175
176 // Unsupported input encoding
177
178 else
179 {
180 vtkGenericWarningMacro(
181 << "Input encoding not supported (" << input_encoding << ")");
182 }
183 }
184
185 // From VTK_ENCODING_UTF_8...
186
187 else if (input_encoding == VTK_ENCODING_UTF_8)
188 {
189 int to_iso_8859 = (output_encoding >= VTK_ENCODING_ISO_8859_1 &&
190 output_encoding <=VTK_ENCODING_ISO_8859_16);
191
192 // To US-ASCII or ISO 8859
193
194 if (output_encoding == VTK_ENCODING_US_ASCII || to_iso_8859)
195 {
196 while (*str)
197 {
198 if (!special_entities || !vtkXMLUtilitiesEncodeEntities(*str, output))
199 {
200 // Multi-byte 2-chars converted into one char
201
202 if (*str > 0x7F)
203 {
204 output << (unsigned char)((*str << 6) | (str[1] & 0x3F));
205 str++;
206 }
207 else
208 {
209 output << *str;
210 }
211 }
212 str++;
213 }
214 }
215
216 // Unsupported output encoding
217
218 else
219 {
220 vtkGenericWarningMacro(
221 << "Output encoding not supported (" << input_encoding << ")");
222 }
223 }
224 }
225
226 //----------------------------------------------------------------------------
CollateAttributes(vtkXMLDataElement * elem,ostream & os,const char * sep)227 void vtkXMLUtilities::CollateAttributes(vtkXMLDataElement *elem,
228 ostream &os,
229 const char *sep)
230 {
231 if (!elem)
232 {
233 return;
234 }
235
236 int i, nb = elem->GetNumberOfAttributes();
237 for (i = 0; i < nb; i++)
238 {
239 const char *name = elem->GetAttributeName(i);
240 if (name)
241 {
242 const char *value = elem->GetAttribute(name);
243 if (value)
244 {
245 if (i)
246 {
247 os << (sep ? sep : " ");
248 }
249 os << name << "=\"";
250 vtkXMLUtilities::EncodeString(
251 value, elem->GetAttributeEncoding(), os, VTK_ENCODING_UTF_8, 1);
252 os << '\"';
253 }
254 }
255 }
256 }
257
258 //----------------------------------------------------------------------------
FlattenElement(vtkXMLDataElement * elem,ostream & os,vtkIndent * indent,int indent_attributes)259 void vtkXMLUtilities::FlattenElement(vtkXMLDataElement *elem,
260 ostream &os,
261 vtkIndent *indent,
262 int indent_attributes)
263 {
264 if (!elem)
265 {
266 return;
267 }
268
269 unsigned long pos = os.tellp();
270
271 // Name
272
273 if (indent)
274 {
275 os << *indent;
276 }
277
278 os << '<' << elem->GetName();
279
280 // Attributes
281
282 if (elem->GetNumberOfAttributes())
283 {
284 os << ' ';
285 if (indent && indent_attributes)
286 {
287 unsigned long len = (unsigned long)os.tellp() - pos;
288 if (os.fail())
289 {
290 return;
291 }
292 char *sep = new char [1 + len + 1];
293 sep[0] = '\n';
294 memset(sep + 1, ' ', len);
295 sep[len + 1] = '\0';
296 vtkXMLUtilities::CollateAttributes(elem, os, sep);
297 delete [] sep;
298 }
299 else
300 {
301 vtkXMLUtilities::CollateAttributes(elem, os);
302 }
303 }
304
305 const char *cdata = elem->GetCharacterData();
306 int nb_nested = elem->GetNumberOfNestedElements();
307 int need_close_tag = (nb_nested || cdata);
308
309 if (!need_close_tag)
310 {
311 os << "/>";
312 }
313 else
314 {
315 os << '>';
316 }
317
318 // cdata
319
320 if (cdata)
321 {
322 vtkXMLUtilities::EncodeString(
323 cdata, elem->GetAttributeEncoding(), os, VTK_ENCODING_UTF_8, 1);
324 }
325
326 // Nested elements
327
328 if (nb_nested)
329 {
330 if (indent)
331 {
332 os << '\n';
333 }
334 for (int i = 0; i < nb_nested; i++)
335 {
336 if (indent)
337 {
338 vtkIndent next_indent = indent->GetNextIndent();
339 vtkXMLUtilities::FlattenElement(elem->GetNestedElement(i),
340 os, &next_indent);
341 }
342 else
343 {
344 vtkXMLUtilities::FlattenElement(elem->GetNestedElement(i), os);
345 }
346 }
347 if (indent)
348 {
349 os << *indent;
350 }
351 }
352
353 // Close
354
355 if (need_close_tag)
356 {
357 os << "</" << elem->GetName() << '>';
358 }
359
360 if (indent)
361 {
362 os << '\n';
363 }
364 }
365
366 //----------------------------------------------------------------------------
WriteElementToFile(vtkXMLDataElement * elem,const char * filename,vtkIndent * indent)367 int vtkXMLUtilities::WriteElementToFile(vtkXMLDataElement *elem,
368 const char *filename,
369 vtkIndent *indent)
370 {
371 if (!elem || !filename)
372 {
373 return 0;
374 }
375
376 ofstream os(filename, ios::out);
377 vtkXMLUtilities::FlattenElement(elem, os, indent);
378
379 os.flush();
380 if (os.fail())
381 {
382 os.close();
383 unlink(filename);
384 return 0;
385 }
386 return 1;
387 }
388
389 //----------------------------------------------------------------------------
390 vtkXMLDataElement*
ReadElementFromStream(istream & is,int encoding)391 vtkXMLUtilities::ReadElementFromStream(istream &is, int encoding)
392 {
393 vtkXMLDataElement *res = nullptr;
394 vtkXMLDataParser* xml_parser = vtkXMLDataParser::New();
395 xml_parser->SetAttributesEncoding(encoding);
396
397 xml_parser->SetStream(&is);
398 if (xml_parser->Parse())
399 {
400 res = xml_parser->GetRootElement();
401 // Bump up the ref count since we are going to delete the parser
402 // which actually owns the element
403 res->SetReferenceCount(res->GetReferenceCount() + 1);
404 vtkXMLUtilities::UnFactorElements(res);
405 }
406
407 xml_parser->Delete();
408 return res;
409 }
410
411 //----------------------------------------------------------------------------
412 vtkXMLDataElement*
ReadElementFromString(const char * str,int encoding)413 vtkXMLUtilities::ReadElementFromString(const char *str, int encoding)
414 {
415 if (!str)
416 {
417 return nullptr;
418 }
419
420 std::stringstream strstr;
421 strstr << str;
422 vtkXMLDataElement *res =
423 vtkXMLUtilities::ReadElementFromStream(strstr, encoding);
424
425 return res;
426 }
427
428 //----------------------------------------------------------------------------
429 vtkXMLDataElement*
ReadElementFromFile(const char * filename,int encoding)430 vtkXMLUtilities::ReadElementFromFile(const char *filename, int encoding)
431 {
432 if (!filename)
433 {
434 return nullptr;
435 }
436
437 ifstream is(filename);
438 return vtkXMLUtilities::ReadElementFromStream(is, encoding);
439 }
440
441 //----------------------------------------------------------------------------
ReadElementFromAttributeArray(vtkXMLDataElement * element,const char ** atts,int encoding)442 void vtkXMLUtilities::ReadElementFromAttributeArray(
443 vtkXMLDataElement *element,
444 const char** atts,
445 int encoding)
446 {
447 if(atts)
448 {
449 // If the target encoding is VTK_ENCODING_NONE or VTK_ENCODING_UNKNOWN,
450 // then keep the internal/default encoding, otherwise encode each
451 // attribute using that new format
452
453 if (encoding != VTK_ENCODING_NONE && encoding != VTK_ENCODING_UNKNOWN)
454 {
455 element->SetAttributeEncoding(encoding);
456 }
457
458 // Process each attributes returned by Expat in UTF-8 encoding, and
459 // convert them to our encoding
460
461 for (int i = 0; atts[i] && atts[i + 1]; i += 2)
462 {
463 if (element->GetAttributeEncoding() == VTK_ENCODING_UTF_8)
464 {
465 element->SetAttribute(atts[i], atts[i + 1]);
466 }
467 else
468 {
469 std::ostringstream str;
470 vtkXMLUtilities::EncodeString(
471 atts[i+1], VTK_ENCODING_UTF_8, str, element->GetAttributeEncoding(), 0);
472 str << ends;
473 element->SetAttribute(atts[i], str.str().c_str());
474 }
475 }
476 }
477 }
478
479 //----------------------------------------------------------------------------
vtkXMLUtilitiesFindSimilarElementsInternal(vtkXMLDataElement * elem,vtkXMLDataElement * tree,vtkXMLUtilitiesDataElementContainer * results)480 static void vtkXMLUtilitiesFindSimilarElementsInternal(
481 vtkXMLDataElement *elem,
482 vtkXMLDataElement *tree,
483 vtkXMLUtilitiesDataElementContainer *results)
484 {
485 if (!elem || !tree || !results || elem == tree)
486 {
487 return;
488 }
489
490 // If the element is equal to the current tree, append it to the
491 // results, otherwise check the sub-trees
492
493 if (elem->IsEqualTo(tree))
494 {
495 results->push_back(tree);
496 }
497 else
498 {
499 for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
500 {
501 vtkXMLUtilitiesFindSimilarElementsInternal(
502 elem, tree->GetNestedElement(i), results);
503 }
504 }
505 }
506
507 //----------------------------------------------------------------------------
FindSimilarElements(vtkXMLDataElement * elem,vtkXMLDataElement * tree,vtkXMLDataElement *** results)508 int vtkXMLUtilities::FindSimilarElements(vtkXMLDataElement *elem,
509 vtkXMLDataElement *tree,
510 vtkXMLDataElement ***results)
511 {
512 *results = nullptr;
513
514 if (!elem || ! tree)
515 {
516 return 0;
517 }
518
519 // Create a data element container, and find all similar elements
520
521 vtkXMLUtilitiesDataElementContainer *container =
522 new vtkXMLUtilitiesDataElementContainer;
523
524 vtkXMLUtilitiesFindSimilarElementsInternal(elem, tree, container);
525
526 // If nothing was found, exit now
527
528 int size = (int)container->size();
529 if (size)
530 {
531 // Allocate an array of element and copy the contents of the container
532 // to this flat structure
533
534 *results = new vtkXMLDataElement* [size];
535
536 size = 0;
537 for (vtkXMLUtilitiesDataElementContainer::const_iterator
538 it = container->begin(); it != container->end(); ++it)
539 {
540 if (*it)
541 {
542 (*results)[size++] = *it;
543 }
544 }
545 }
546
547 delete container;
548
549 return size;
550 }
551
552 //----------------------------------------------------------------------------
FactorElements(vtkXMLDataElement * tree)553 void vtkXMLUtilities::FactorElements(vtkXMLDataElement *tree)
554 {
555 if (!tree)
556 {
557 return;
558 }
559
560 // Create the factored pool, and add it to the tree so that it can
561 // factor itself too
562
563 vtkXMLDataElement *pool = vtkXMLDataElement::New();
564 pool->SetName(VTK_XML_UTILITIES_FACTORED_POOL_NAME);
565 pool->SetAttributeEncoding(tree->GetAttributeEncoding());
566 tree->AddNestedElement(pool);
567
568 // Factor the tree, as long as some factorization has occurred
569 // (multiple pass might be needed because larger trees are factored
570 // first)
571
572 while (vtkXMLUtilities::FactorElementsInternal(tree, tree, pool)) {};
573
574 // Nothing factored, remove the useless pool
575
576 if (!pool->GetNumberOfNestedElements())
577 {
578 tree->RemoveNestedElement(pool);
579 }
580
581 pool->Delete();
582 }
583
584 //----------------------------------------------------------------------------
FactorElementsInternal(vtkXMLDataElement * tree,vtkXMLDataElement * root,vtkXMLDataElement * pool)585 int vtkXMLUtilities::FactorElementsInternal(vtkXMLDataElement *tree,
586 vtkXMLDataElement *root,
587 vtkXMLDataElement *pool)
588 {
589 if (!tree || !root || !pool)
590 {
591 return 0;
592 }
593
594 // Do not bother factoring something already factored
595
596 if (tree->GetName() &&
597 !strcmp(tree->GetName(), VTK_XML_UTILITIES_FACTORED_REF_NAME))
598 {
599 return 0;
600 }
601
602 // Try to find all trees similar to the current tree
603
604 vtkXMLDataElement **similar_trees;
605 int nb_of_similar_trees = vtkXMLUtilities::FindSimilarElements(
606 tree, root, &similar_trees);
607
608 // None was found, try to factor the sub-trees
609
610 if (!nb_of_similar_trees)
611 {
612 int res = 0;
613 for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
614 {
615 res += vtkXMLUtilities::FactorElementsInternal(
616 tree->GetNestedElement(i), root, pool);
617 }
618 return res ? 1 : 0;
619 }
620
621 // Otherwise replace those trees with factored refs
622
623 char buffer[5];
624 snprintf(buffer, sizeof(buffer), "%02d_", pool->GetNumberOfNestedElements());
625
626 std::ostringstream id;
627 id << buffer << tree->GetName();
628
629 vtkXMLDataElement *factored = vtkXMLDataElement::New();
630 factored->SetName(VTK_XML_UTILITIES_FACTORED_NAME);
631 factored->SetAttributeEncoding(pool->GetAttributeEncoding());
632 factored->SetAttribute("Id", id.str().c_str());
633 pool->AddNestedElement(factored);
634 factored->Delete();
635
636 vtkXMLDataElement *tree_copy = vtkXMLDataElement::New();
637 tree_copy->DeepCopy(tree);
638 factored->AddNestedElement(tree_copy);
639 tree_copy->Delete();
640
641 for (int i = 0; i < nb_of_similar_trees; i++)
642 {
643 similar_trees[i]->RemoveAllAttributes();
644 similar_trees[i]->RemoveAllNestedElements();
645 similar_trees[i]->SetCharacterData(nullptr, 0);
646 similar_trees[i]->SetName(VTK_XML_UTILITIES_FACTORED_REF_NAME);
647 similar_trees[i]->SetAttribute("Id", id.str().c_str());
648 }
649
650 tree->RemoveAllAttributes();
651 tree->RemoveAllNestedElements();
652 tree->SetCharacterData(nullptr, 0);
653 tree->SetName(VTK_XML_UTILITIES_FACTORED_REF_NAME);
654 tree->SetAttribute("Id", id.str().c_str());
655
656 delete [] similar_trees;
657
658 return 1;
659 }
660
661 //----------------------------------------------------------------------------
UnFactorElements(vtkXMLDataElement * tree)662 void vtkXMLUtilities::UnFactorElements(vtkXMLDataElement *tree)
663 {
664 if (!tree)
665 {
666 return;
667 }
668
669 // Search for the factored pool, if not found, we are done
670
671 vtkXMLDataElement *pool = tree->FindNestedElementWithName(
672 VTK_XML_UTILITIES_FACTORED_POOL_NAME);
673 if (!pool)
674 {
675 return;
676 }
677
678 // Remove the pool from the tree, because it makes no sense
679 // unfactoring it too
680
681 pool->Register(tree);
682 tree->RemoveNestedElement(pool);
683
684 // Unfactor the tree
685
686 vtkXMLUtilities::UnFactorElementsInternal(tree, pool);
687
688 // Remove the useless empty pool
689
690 pool->UnRegister(tree);
691 }
692
693 //----------------------------------------------------------------------------
UnFactorElementsInternal(vtkXMLDataElement * tree,vtkXMLDataElement * pool)694 int vtkXMLUtilities::UnFactorElementsInternal(vtkXMLDataElement *tree,
695 vtkXMLDataElement *pool)
696 {
697 if (!tree || !pool)
698 {
699 return 0;
700 }
701
702 int res = 0;
703
704 // We found a factor, replace it with the corresponding sub-tree
705
706 if (tree->GetName() &&
707 !strcmp(tree->GetName(), VTK_XML_UTILITIES_FACTORED_REF_NAME))
708 {
709 vtkXMLDataElement *original_tree =
710 pool->FindNestedElementWithNameAndAttribute(
711 VTK_XML_UTILITIES_FACTORED_NAME, "Id", tree->GetAttribute("Id"));
712 if (original_tree && original_tree->GetNumberOfNestedElements())
713 {
714 tree->DeepCopy(original_tree->GetNestedElement(0));
715 res++;
716 }
717 }
718
719 // Now try to unfactor the sub-trees
720
721 for (int i = 0; i < tree->GetNumberOfNestedElements(); i++)
722 {
723 res += vtkXMLUtilities::UnFactorElementsInternal(
724 tree->GetNestedElement(i), pool);
725 }
726
727 return res ? 1 : 0;
728 }
729