1 // Copyright (c) 2014-2020 Thomas Fussell
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE
20 //
21 // @license: http://www.opensource.org/licenses/mit-license.php
22 // @author: see AUTHORS file
23
24 #include <cassert>
25 #include <cctype>
26 #include <numeric> // for std::accumulate
27 #include <sstream>
28 #include <unordered_map>
29
30 #include <xlnt/cell/cell.hpp>
31 #include <xlnt/cell/comment.hpp>
32 #include <xlnt/cell/hyperlink.hpp>
33 #include <xlnt/drawing/spreadsheet_drawing.hpp>
34 #include <xlnt/packaging/manifest.hpp>
35 #include <xlnt/utils/optional.hpp>
36 #include <xlnt/utils/path.hpp>
37 #include <xlnt/workbook/workbook.hpp>
38 #include <xlnt/worksheet/selection.hpp>
39 #include <xlnt/worksheet/worksheet.hpp>
40 #include <detail/constants.hpp>
41 #include <detail/header_footer/header_footer_code.hpp>
42 #include <detail/implementations/workbook_impl.hpp>
43 #include <detail/serialization/custom_value_traits.hpp>
44 #include <detail/serialization/serialisation_helpers.hpp>
45 #include <detail/serialization/vector_streambuf.hpp>
46 #include <detail/serialization/xlsx_consumer.hpp>
47 #include <detail/serialization/zstream.hpp>
48
49 namespace {
50 /// string_equal
51 /// for comparison between std::string and string literals
52 /// improves on std::string::operator==(char*) by knowing the length ahead of time
53 template <size_t N>
string_arr_loop_equal(const std::string & lhs,const char (& rhs)[N])54 inline bool string_arr_loop_equal(const std::string &lhs, const char (&rhs)[N])
55 {
56 for (size_t i = 0; i < N - 1; ++i)
57 {
58 if (lhs[i] != rhs[i])
59 {
60 return false;
61 }
62 }
63 return true;
64 }
65
66 template <size_t N>
string_equal(const std::string & lhs,const char (& rhs)[N])67 inline bool string_equal(const std::string &lhs, const char (&rhs)[N])
68 {
69 if (lhs.size() != N - 1)
70 {
71 return false;
72 }
73 // split function to assist with inlining of the size check
74 return string_arr_loop_equal(lhs, rhs);
75 }
76
qn(const std::string & namespace_,const std::string & name)77 xml::qname &qn(const std::string &namespace_, const std::string &name)
78 {
79 using qname_map = std::unordered_map<std::string, xml::qname>;
80 static auto memo = std::unordered_map<std::string, qname_map>();
81
82 auto &ns_memo = memo[namespace_];
83
84 if (ns_memo.find(name) == ns_memo.end())
85 {
86 return ns_memo.emplace(name, xml::qname(xlnt::constants::ns(namespace_), name)).first->second;
87 }
88
89 return ns_memo[name];
90 }
91
92 /// <summary>
93 /// Returns true if bool_string represents a true xsd:boolean.
94 /// </summary>
is_true(const std::string & bool_string)95 bool is_true(const std::string &bool_string)
96 {
97 if (bool_string == "1" || bool_string == "true")
98 {
99 return true;
100 }
101
102 #ifdef THROW_ON_INVALID_XML
103 if (bool_string == "0" || bool_string == "false")
104 {
105 return false;
106 }
107
108 throw xlnt::exception("xsd:boolean should be one of: 0, 1, true, or false, found " + bool_string);
109 #else
110
111 return false;
112 #endif
113 }
114
115 using style_id_pair = std::pair<xlnt::detail::style_impl, std::size_t>;
116
117 /// <summary>
118 /// Try to find given xfid value in the styles vector and, if succeeded, set's the optional style.
119 /// </summary>
set_style_by_xfid(const std::vector<style_id_pair> & styles,std::size_t xfid,xlnt::optional<std::string> & style)120 void set_style_by_xfid(const std::vector<style_id_pair> &styles,
121 std::size_t xfid, xlnt::optional<std::string> &style)
122 {
123 for (auto &item : styles)
124 {
125 if (item.second == xfid)
126 {
127 style = item.first.name;
128 }
129 }
130 }
131
132 // <sheetData> element
133 struct Sheet_Data
134 {
135 std::vector<std::pair<xlnt::row_properties, xlnt::row_t>> parsed_rows;
136 std::vector<xlnt::detail::Cell> parsed_cells;
137 };
138
type_from_string(const std::string & str)139 xlnt::cell_type type_from_string(const std::string &str)
140 {
141 if (string_equal(str, "s"))
142 {
143 return xlnt::cell::type::shared_string;
144 }
145 else if (string_equal(str, "n"))
146 {
147 return xlnt::cell::type::number;
148 }
149 else if (string_equal(str, "b"))
150 {
151 return xlnt::cell::type::boolean;
152 }
153 else if (string_equal(str, "e"))
154 {
155 return xlnt::cell::type::error;
156 }
157 else if (string_equal(str, "inlineStr"))
158 {
159 return xlnt::cell::type::inline_string;
160 }
161 else if (string_equal(str, "str"))
162 {
163 return xlnt::cell::type::formula_string;
164 }
165 return xlnt::cell::type::shared_string;
166 }
167
parse_cell(xlnt::row_t row_arg,xml::parser * parser)168 xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
169 {
170 xlnt::detail::Cell c;
171 for (auto &attr : parser->attribute_map())
172 {
173 if (string_equal(attr.first.name(), "r"))
174 {
175 c.ref = xlnt::detail::Cell_Reference(row_arg, attr.second.value);
176 }
177 else if (string_equal(attr.first.name(), "t"))
178 {
179 c.type = type_from_string(attr.second.value);
180 }
181 else if (string_equal(attr.first.name(), "s"))
182 {
183 c.style_index = static_cast<int>(strtol(attr.second.value.c_str(), nullptr, 10));
184 }
185 else if (string_equal(attr.first.name(), "ph"))
186 {
187 c.is_phonetic = is_true(attr.second.value);
188 }
189 else if (string_equal(attr.first.name(), "cm"))
190 {
191 c.cell_metatdata_idx = static_cast<int>(strtol(attr.second.value.c_str(), nullptr, 10));
192 }
193 }
194 int level = 1; // nesting level
195 // 1 == <c>
196 // 2 == <v>/<f>
197 // 3 == <is><t>
198 // exit loop at </c>
199 while (level > 0)
200 {
201 xml::parser::event_type e = parser->next();
202 switch (e)
203 {
204 case xml::parser::start_element: {
205 ++level;
206 break;
207 }
208 case xml::parser::end_element: {
209 --level;
210 break;
211 }
212 case xml::parser::characters: {
213 // only want the characters inside one of the nested tags
214 // without this a lot of formatting whitespace can get added
215 if (level == 2)
216 {
217 // <v> -> numeric values
218 if (string_equal(parser->name(), "v"))
219 {
220 c.value += std::move(parser->value());
221 }
222 // <f> formula
223 else if (string_equal(parser->name(), "f"))
224 {
225 c.formula_string += std::move(parser->value());
226 }
227 }
228 else if (level == 3)
229 {
230 // <is><t> -> inline string
231 if (string_equal(parser->name(), "t"))
232 {
233 c.value += std::move(parser->value());
234 }
235 }
236 break;
237 }
238 case xml::parser::start_namespace_decl:
239 case xml::parser::end_namespace_decl:
240 case xml::parser::start_attribute:
241 case xml::parser::end_attribute:
242 case xml::parser::eof:
243 default: {
244 throw xlnt::exception("unexcpected XML parsing event");
245 }
246 }
247 // Prevents unhandled exceptions from being triggered.
248 parser->attribute_map();
249 }
250 return c;
251 }
252
253 // <row> inside <sheetData> element
parse_row(xml::parser * parser,xlnt::detail::number_serialiser & converter,std::vector<xlnt::detail::Cell> & parsed_cells)254 std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector<xlnt::detail::Cell> &parsed_cells)
255 {
256 std::pair<xlnt::row_properties, int> props;
257 for (auto &attr : parser->attribute_map())
258 {
259 if (string_equal(attr.first.name(), "dyDescent"))
260 {
261 props.first.dy_descent = converter.deserialise(attr.second.value);
262 }
263 else if (string_equal(attr.first.name(), "spans"))
264 {
265 props.first.spans = attr.second.value;
266 }
267 else if (string_equal(attr.first.name(), "ht"))
268 {
269 props.first.height = converter.deserialise(attr.second.value);
270 }
271 else if (string_equal(attr.first.name(), "s"))
272 {
273 props.first.style = strtoul(attr.second.value.c_str(), nullptr, 10);
274 }
275 else if (string_equal(attr.first.name(), "hidden"))
276 {
277 props.first.hidden = is_true(attr.second.value);
278 }
279 else if (string_equal(attr.first.name(), "customFormat"))
280 {
281 props.first.custom_format = is_true(attr.second.value);
282 }
283 else if (string_equal(attr.first.name(), "ph"))
284 {
285 is_true(attr.second.value);
286 }
287 else if (string_equal(attr.first.name(), "r"))
288 {
289 props.second = static_cast<int>(strtol(attr.second.value.c_str(), nullptr, 10));
290 }
291 else if (string_equal(attr.first.name(), "customHeight"))
292 {
293 props.first.custom_height = is_true(attr.second.value.c_str());
294 }
295 }
296
297 int level = 1;
298 while (level > 0)
299 {
300 xml::parser::event_type e = parser->next();
301 switch (e)
302 {
303 case xml::parser::start_element: {
304 parsed_cells.push_back(parse_cell(static_cast<xlnt::row_t>(props.second), parser));
305 break;
306 }
307 case xml::parser::end_element: {
308 --level;
309 break;
310 }
311 case xml::parser::characters: {
312 // ignore whitespace
313 break;
314 }
315 case xml::parser::start_namespace_decl:
316 case xml::parser::start_attribute:
317 case xml::parser::end_namespace_decl:
318 case xml::parser::end_attribute:
319 case xml::parser::eof:
320 default: {
321 throw xlnt::exception("unexcpected XML parsing event");
322 }
323 }
324 }
325 return props;
326 }
327
328 // <sheetData> inside <worksheet> element
parse_sheet_data(xml::parser * parser,xlnt::detail::number_serialiser & converter)329 Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser &converter)
330 {
331 Sheet_Data sheet_data;
332 int level = 1; // nesting level
333 // 1 == <sheetData>
334 // 2 == <row>
335
336 while (level > 0)
337 {
338 xml::parser::event_type e = parser->next();
339 switch (e)
340 {
341 case xml::parser::start_element: {
342 sheet_data.parsed_rows.push_back(parse_row(parser, converter, sheet_data.parsed_cells));
343 break;
344 }
345 case xml::parser::end_element: {
346 --level;
347 break;
348 }
349 case xml::parser::characters: {
350 // ignore, whitespace formatting normally
351 break;
352 }
353 case xml::parser::start_namespace_decl:
354 case xml::parser::start_attribute:
355 case xml::parser::end_namespace_decl:
356 case xml::parser::end_attribute:
357 case xml::parser::eof:
358 default: {
359 throw xlnt::exception("unexcpected XML parsing event");
360 }
361 }
362 }
363 return sheet_data;
364 }
365
366 } // namespace
367
368 /*
369 class parsing_context
370 {
371 public:
372 parsing_context(xlnt::detail::zip_file_reader &archive, const std::string &filename)
373 : parser_(stream_, filename)
374 {
375 }
376
377 xml::parser &parser();
378
379 private:
380 std::istream stream_;
381 xml::parser parser_;
382 };
383 */
384
385 namespace xlnt {
386 namespace detail {
387
xlsx_consumer(workbook & target)388 xlsx_consumer::xlsx_consumer(workbook &target)
389 : target_(target),
390 parser_(nullptr)
391 {
392 }
393
~xlsx_consumer()394 xlsx_consumer::~xlsx_consumer()
395 {
396 }
397
read(std::istream & source)398 void xlsx_consumer::read(std::istream &source)
399 {
400 archive_.reset(new izstream(source));
401 populate_workbook(false);
402 }
403
open(std::istream & source)404 void xlsx_consumer::open(std::istream &source)
405 {
406 archive_.reset(new izstream(source));
407 populate_workbook(true);
408 }
409
read_cell()410 cell xlsx_consumer::read_cell()
411 {
412 return cell(streaming_cell_.get());
413 }
414
read_worksheet(const std::string & rel_id)415 void xlsx_consumer::read_worksheet(const std::string &rel_id)
416 {
417 read_worksheet_begin(rel_id);
418
419 if (!streaming_)
420 {
421 read_worksheet_sheetdata();
422 read_worksheet_end(rel_id);
423 }
424 }
425
read_worksheet_begin(const std::string & rel_id)426 std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id)
427 {
428 if (streaming_ && streaming_cell_ == nullptr)
429 {
430 streaming_cell_.reset(new detail::cell_impl());
431 }
432
433 auto title = std::find_if(target_.d_->sheet_title_rel_id_map_.begin(),
434 target_.d_->sheet_title_rel_id_map_.end(),
435 [&](const std::pair<std::string, std::string> &p) {
436 return p.second == rel_id;
437 })->first;
438
439 auto ws = worksheet(current_worksheet_);
440
441 expect_start_element(qn("spreadsheetml", "worksheet"), xml::content::complex); // CT_Worksheet
442 skip_attributes({qn("mc", "Ignorable")});
443
444 while (in_element(qn("spreadsheetml", "worksheet")))
445 {
446 auto current_worksheet_element = expect_start_element(xml::content::complex);
447
448 if (current_worksheet_element == qn("spreadsheetml", "sheetPr")) // CT_SheetPr 0-1
449 {
450 sheet_pr props;
451 if (parser().attribute_present("syncHorizontal"))
452 { // optional, boolean, false
453 props.sync_horizontal.set(parser().attribute<bool>("syncHorizontal"));
454 }
455 if (parser().attribute_present("syncVertical"))
456 { // optional, boolean, false
457 props.sync_vertical.set(parser().attribute<bool>("syncVertical"));
458 }
459 if (parser().attribute_present("syncRef"))
460 { // optional, ST_Ref, false
461 props.sync_ref.set(cell_reference(parser().attribute("syncRef")));
462 }
463 if (parser().attribute_present("transitionEvaluation"))
464 { // optional, boolean, false
465 props.transition_evaluation.set(parser().attribute<bool>("transitionEvaluation"));
466 }
467 if (parser().attribute_present("transitionEntry"))
468 { // optional, boolean, false
469 props.transition_entry.set(parser().attribute<bool>("transitionEntry"));
470 }
471 if (parser().attribute_present("published"))
472 { // optional, boolean, true
473 props.published.set(parser().attribute<bool>("published"));
474 }
475 if (parser().attribute_present("codeName"))
476 { // optional, string
477 props.code_name.set(parser().attribute<std::string>("codeName"));
478 }
479 if (parser().attribute_present("filterMode"))
480 { // optional, boolean, false
481 props.filter_mode.set(parser().attribute<bool>("filterMode"));
482 }
483 if (parser().attribute_present("enableFormatConditionsCalculation"))
484 { // optional, boolean, true
485 props.enable_format_condition_calculation.set(parser().attribute<bool>("enableFormatConditionsCalculation"));
486 }
487 ws.d_->sheet_properties_.set(props);
488 while (in_element(current_worksheet_element))
489 {
490 auto sheet_pr_child_element = expect_start_element(xml::content::simple);
491
492 if (sheet_pr_child_element == qn("spreadsheetml", "tabColor")) // CT_Color 0-1
493 {
494 read_color();
495 }
496 else if (sheet_pr_child_element == qn("spreadsheetml", "outlinePr")) // CT_OutlinePr 0-1
497 {
498 skip_attribute("applyStyles"); // optional, boolean, false
499 skip_attribute("summaryBelow"); // optional, boolean, true
500 skip_attribute("summaryRight"); // optional, boolean, true
501 skip_attribute("showOutlineSymbols"); // optional, boolean, true
502 }
503 else if (sheet_pr_child_element == qn("spreadsheetml", "pageSetUpPr")) // CT_PageSetUpPr 0-1
504 {
505 skip_attribute("autoPageBreaks"); // optional, boolean, true
506 skip_attribute("fitToPage"); // optional, boolean, false
507 }
508 else
509 {
510 unexpected_element(sheet_pr_child_element);
511 }
512
513 expect_end_element(sheet_pr_child_element);
514 }
515 }
516 else if (current_worksheet_element == qn("spreadsheetml", "dimension")) // CT_SheetDimension 0-1
517 {
518 skip_remaining_content(current_worksheet_element);
519 }
520 else if (current_worksheet_element == qn("spreadsheetml", "sheetViews")) // CT_SheetViews 0-1
521 {
522 while (in_element(current_worksheet_element))
523 {
524 expect_start_element(qn("spreadsheetml", "sheetView"), xml::content::complex); // CT_SheetView 1+
525
526 sheet_view new_view;
527 new_view.id(parser().attribute<std::size_t>("workbookViewId"));
528
529 if (parser().attribute_present("showGridLines")) // default="true"
530 {
531 new_view.show_grid_lines(is_true(parser().attribute("showGridLines")));
532 }
533 if (parser().attribute_present("topLeftCell"))
534 {
535 new_view.top_left_cell(cell_reference(parser().attribute("topLeftCell")));
536 }
537
538 if (parser().attribute_present("defaultGridColor")) // default="true"
539 {
540 new_view.default_grid_color(is_true(parser().attribute("defaultGridColor")));
541 }
542
543 if (parser().attribute_present("view")
544 && parser().attribute("view") != "normal")
545 {
546 new_view.type(parser().attribute("view") == "pageBreakPreview"
547 ? sheet_view_type::page_break_preview
548 : sheet_view_type::page_layout);
549 }
550
551 if (parser().attribute_present("tabSelected")
552 && is_true(parser().attribute("tabSelected")))
553 {
554 target_.d_->view_.get().active_tab = ws.id() - 1;
555 }
556
557 skip_attributes({"windowProtection", "showFormulas", "showRowColHeaders", "showZeros", "rightToLeft", "showRuler", "showOutlineSymbols", "showWhiteSpace",
558 "view", "topLeftCell", "colorId", "zoomScale", "zoomScaleNormal", "zoomScaleSheetLayoutView",
559 "zoomScalePageLayoutView"});
560
561 while (in_element(qn("spreadsheetml", "sheetView")))
562 {
563 auto sheet_view_child_element = expect_start_element(xml::content::simple);
564
565 if (sheet_view_child_element == qn("spreadsheetml", "pane")) // CT_Pane 0-1
566 {
567 pane new_pane;
568
569 if (parser().attribute_present("topLeftCell"))
570 {
571 new_pane.top_left_cell = cell_reference(parser().attribute("topLeftCell"));
572 }
573
574 if (parser().attribute_present("xSplit"))
575 {
576 new_pane.x_split = parser().attribute<column_t::index_t>("xSplit");
577 }
578
579 if (parser().attribute_present("ySplit"))
580 {
581 new_pane.y_split = parser().attribute<row_t>("ySplit");
582 }
583
584 if (parser().attribute_present("activePane"))
585 {
586 new_pane.active_pane = parser().attribute<pane_corner>("activePane");
587 }
588
589 if (parser().attribute_present("state"))
590 {
591 new_pane.state = parser().attribute<pane_state>("state");
592 }
593
594 new_view.pane(new_pane);
595 }
596 else if (sheet_view_child_element == qn("spreadsheetml", "selection")) // CT_Selection 0-4
597 {
598 selection current_selection;
599
600 if (parser().attribute_present("activeCell"))
601 {
602 current_selection.active_cell(parser().attribute("activeCell"));
603 }
604
605 if (parser().attribute_present("sqref"))
606 {
607 const auto sqref = range_reference(parser().attribute("sqref"));
608 current_selection.sqref(sqref);
609 }
610
611 if (parser().attribute_present("pane"))
612 {
613 current_selection.pane(parser().attribute<pane_corner>("pane"));
614 }
615
616 new_view.add_selection(current_selection);
617
618 skip_remaining_content(sheet_view_child_element);
619 }
620 else if (sheet_view_child_element == qn("spreadsheetml", "pivotSelection")) // CT_PivotSelection 0-4
621 {
622 skip_remaining_content(sheet_view_child_element);
623 }
624 else if (sheet_view_child_element == qn("spreadsheetml", "extLst")) // CT_ExtensionList 0-1
625 {
626 skip_remaining_content(sheet_view_child_element);
627 }
628 else
629 {
630 unexpected_element(sheet_view_child_element);
631 }
632
633 expect_end_element(sheet_view_child_element);
634 }
635
636 expect_end_element(qn("spreadsheetml", "sheetView"));
637
638 ws.d_->views_.push_back(new_view);
639 }
640 }
641 else if (current_worksheet_element == qn("spreadsheetml", "sheetFormatPr")) // CT_SheetFormatPr 0-1
642 {
643 if (parser().attribute_present("baseColWidth"))
644 {
645 ws.d_->format_properties_.base_col_width =
646 converter_.deserialise(parser().attribute("baseColWidth"));
647 }
648 if (parser().attribute_present("defaultColWidth"))
649 {
650 ws.d_->format_properties_.default_column_width =
651 converter_.deserialise(parser().attribute("defaultColWidth"));
652 }
653 if (parser().attribute_present("defaultRowHeight"))
654 {
655 ws.d_->format_properties_.default_row_height =
656 converter_.deserialise(parser().attribute("defaultRowHeight"));
657 }
658
659 if (parser().attribute_present(qn("x14ac", "dyDescent")))
660 {
661 ws.d_->format_properties_.dy_descent =
662 converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
663 }
664
665 skip_attributes();
666 }
667 else if (current_worksheet_element == qn("spreadsheetml", "cols")) // CT_Cols 0+
668 {
669 while (in_element(qn("spreadsheetml", "cols")))
670 {
671 expect_start_element(qn("spreadsheetml", "col"), xml::content::simple);
672
673 skip_attributes(std::vector<std::string>{"collapsed", "outlineLevel"});
674
675 auto min = static_cast<column_t::index_t>(std::stoull(parser().attribute("min")));
676 auto max = static_cast<column_t::index_t>(std::stoull(parser().attribute("max")));
677
678 // avoid uninitialised warnings in GCC by using a lambda to make the conditional initialisation
679 optional<double> width = [this](xml::parser &p) -> xlnt::optional<double> {
680 if (p.attribute_present("width"))
681 {
682 return (converter_.deserialise(p.attribute("width")) * 7 - 5) / 7;
683 }
684 return xlnt::optional<double>();
685 }(parser());
686 // avoid uninitialised warnings in GCC by using a lambda to make the conditional initialisation
687 optional<std::size_t> column_style = [](xml::parser &p) -> xlnt::optional<std::size_t> {
688 if (p.attribute_present("style"))
689 {
690 return p.attribute<std::size_t>("style");
691 }
692 return xlnt::optional<std::size_t>();
693 }(parser());
694
695 auto custom = parser().attribute_present("customWidth")
696 ? is_true(parser().attribute("customWidth"))
697 : false;
698 auto hidden = parser().attribute_present("hidden")
699 ? is_true(parser().attribute("hidden"))
700 : false;
701 auto best_fit = parser().attribute_present("bestFit")
702 ? is_true(parser().attribute("bestFit"))
703 : false;
704
705 expect_end_element(qn("spreadsheetml", "col"));
706
707 for (auto column = min; column <= max; column++)
708 {
709 column_properties props;
710
711 if (width.is_set())
712 {
713 props.width = width.get();
714 }
715
716 if (column_style.is_set())
717 {
718 props.style = column_style.get();
719 }
720
721 props.hidden = hidden;
722 props.custom_width = custom;
723 props.best_fit = best_fit;
724 ws.add_column_properties(column, props);
725 }
726 }
727 }
728 else if (current_worksheet_element == qn("spreadsheetml", "sheetData")) // CT_SheetData 1
729 {
730 return title;
731 }
732
733 expect_end_element(current_worksheet_element);
734 }
735
736 return title;
737 }
738
read_worksheet_sheetdata()739 void xlsx_consumer::read_worksheet_sheetdata()
740 {
741 if (stack_.back() != qn("spreadsheetml", "sheetData"))
742 {
743 return;
744 }
745 Sheet_Data ws_data = parse_sheet_data(parser_, converter_);
746 // NOTE: parse->construct are seperated here and could easily be threaded
747 // with a SPSC queue for what is likely to be an easy performance win
748 for (auto &row : ws_data.parsed_rows)
749 {
750 current_worksheet_->row_properties_.emplace(row.second, std::move(row.first));
751 }
752 auto impl = detail::cell_impl();
753 for (Cell &cell : ws_data.parsed_cells)
754 {
755 impl.parent_ = current_worksheet_;
756 impl.column_ = cell.ref.column;
757 impl.row_ = cell.ref.row;
758 detail::cell_impl *ws_cell_impl = ¤t_worksheet_->cell_map_.emplace(cell_reference(impl.column_, impl.row_), std::move(impl)).first->second;
759 if (cell.style_index != -1)
760 {
761 ws_cell_impl->format_ = target_.format(static_cast<size_t>(cell.style_index)).d_;
762 }
763 if (cell.cell_metatdata_idx != -1)
764 {
765 }
766 ws_cell_impl->phonetics_visible_ = cell.is_phonetic;
767 if (!cell.formula_string.empty())
768 {
769 ws_cell_impl->formula_ = cell.formula_string[0] == '=' ? cell.formula_string.substr(1) : std::move(cell.formula_string);
770 }
771 if (!cell.value.empty())
772 {
773 ws_cell_impl->type_ = cell.type;
774 switch (cell.type)
775 {
776 case cell::type::boolean: {
777 ws_cell_impl->value_numeric_ = is_true(cell.value) ? 1.0 : 0.0;
778 break;
779 }
780 case cell::type::empty:
781 case cell::type::number:
782 case cell::type::date: {
783 ws_cell_impl->value_numeric_ = converter_.deserialise(cell.value);
784 break;
785 }
786 case cell::type::shared_string: {
787 ws_cell_impl->value_numeric_ = static_cast<double>(strtol(cell.value.c_str(), nullptr, 10));
788 break;
789 }
790 case cell::type::inline_string: {
791 ws_cell_impl->value_text_ = std::move(cell.value);
792 break;
793 }
794 case cell::type::formula_string: {
795 ws_cell_impl->value_text_ = std::move(cell.value);
796 break;
797 }
798 case cell::type::error: {
799 ws_cell_impl->value_text_.plain_text(cell.value, false);
800 break;
801 }
802 }
803 }
804 }
805 stack_.pop_back();
806 }
807
read_worksheet_end(const std::string & rel_id)808 worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id)
809 {
810 auto &manifest = target_.manifest();
811
812 const auto workbook_rel = manifest.relationship(path("/"), relationship_type::office_document);
813 const auto sheet_rel = manifest.relationship(workbook_rel.target().path(), rel_id);
814 path sheet_path(sheet_rel.source().path().parent().append(sheet_rel.target().path()));
815 auto hyperlinks = manifest.relationships(sheet_path, xlnt::relationship_type::hyperlink);
816
817 auto ws = worksheet(current_worksheet_);
818
819 while (in_element(qn("spreadsheetml", "worksheet")))
820 {
821 auto current_worksheet_element = expect_start_element(xml::content::complex);
822
823 if (current_worksheet_element == qn("spreadsheetml", "sheetCalcPr")) // CT_SheetCalcPr 0-1
824 {
825 skip_remaining_content(current_worksheet_element);
826 }
827 else if (current_worksheet_element == qn("spreadsheetml", "sheetProtection")) // CT_SheetProtection 0-1
828 {
829 skip_remaining_content(current_worksheet_element);
830 }
831 else if (current_worksheet_element == qn("spreadsheetml", "protectedRanges")) // CT_ProtectedRanges 0-1
832 {
833 skip_remaining_content(current_worksheet_element);
834 }
835 else if (current_worksheet_element == qn("spreadsheetml", "scenarios")) // CT_Scenarios 0-1
836 {
837 skip_remaining_content(current_worksheet_element);
838 }
839 else if (current_worksheet_element == qn("spreadsheetml", "autoFilter")) // CT_AutoFilter 0-1
840 {
841 ws.auto_filter(xlnt::range_reference(parser().attribute("ref")));
842 // auto filter complex
843 skip_remaining_content(current_worksheet_element);
844 }
845 else if (current_worksheet_element == qn("spreadsheetml", "sortState")) // CT_SortState 0-1
846 {
847 skip_remaining_content(current_worksheet_element);
848 }
849 else if (current_worksheet_element == qn("spreadsheetml", "dataConsolidate")) // CT_DataConsolidate 0-1
850 {
851 skip_remaining_content(current_worksheet_element);
852 }
853 else if (current_worksheet_element == qn("spreadsheetml", "customSheetViews")) // CT_CustomSheetViews 0-1
854 {
855 skip_remaining_content(current_worksheet_element);
856 }
857 else if (current_worksheet_element == qn("spreadsheetml", "mergeCells")) // CT_MergeCells 0-1
858 {
859 auto count = std::stoull(parser().attribute("count"));
860
861 while (in_element(qn("spreadsheetml", "mergeCells")))
862 {
863 expect_start_element(qn("spreadsheetml", "mergeCell"), xml::content::simple);
864 ws.merge_cells(range_reference(parser().attribute("ref")));
865 expect_end_element(qn("spreadsheetml", "mergeCell"));
866
867 count--;
868 }
869
870 if (count != 0)
871 {
872 throw invalid_file("sizes don't match");
873 }
874 }
875 else if (current_worksheet_element == qn("spreadsheetml", "phoneticPr")) // CT_PhoneticPr 0-1
876 {
877 phonetic_pr phonetic_properties(parser().attribute<std::uint32_t>("fontId"));
878 if (parser().attribute_present("type"))
879 {
880 phonetic_properties.type(phonetic_pr::type_from_string(parser().attribute("type")));
881 }
882 if (parser().attribute_present("alignment"))
883 {
884 phonetic_properties.alignment(phonetic_pr::alignment_from_string(parser().attribute("alignment")));
885 }
886 current_worksheet_->phonetic_properties_.set(phonetic_properties);
887 }
888 else if (current_worksheet_element == qn("spreadsheetml", "conditionalFormatting")) // CT_ConditionalFormatting 0+
889 {
890 skip_remaining_content(current_worksheet_element);
891 }
892 else if (current_worksheet_element == qn("spreadsheetml", "dataValidations")) // CT_DataValidations 0-1
893 {
894 skip_remaining_content(current_worksheet_element);
895 }
896 else if (current_worksheet_element == qn("spreadsheetml", "hyperlinks")) // CT_Hyperlinks 0-1
897 {
898 while (in_element(current_worksheet_element))
899 {
900 // CT_Hyperlink
901 expect_start_element(qn("spreadsheetml", "hyperlink"), xml::content::simple);
902
903 auto cell = ws.cell(parser().attribute("ref"));
904
905 if (parser().attribute_present(qn("r", "id")))
906 {
907 auto hyperlink_rel_id = parser().attribute(qn("r", "id"));
908 auto hyperlink_rel = std::find_if(hyperlinks.begin(), hyperlinks.end(),
909 [&](const relationship &r) { return r.id() == hyperlink_rel_id; });
910
911 if (hyperlink_rel != hyperlinks.end())
912 {
913 auto url = hyperlink_rel->target().path().string();
914
915 if (cell.has_value())
916 {
917 cell.hyperlink(url, cell.value<std::string>());
918 }
919 else
920 {
921 cell.hyperlink(url);
922 }
923 }
924 }
925 else if (parser().attribute_present("location"))
926 {
927 auto hyperlink = hyperlink_impl();
928
929 auto location = parser().attribute("location");
930 hyperlink.relationship = relationship("", relationship_type::hyperlink,
931 uri(""), uri(location), target_mode::internal);
932
933 if (parser().attribute_present("display"))
934 {
935 hyperlink.display = parser().attribute("display");
936 }
937
938 if (parser().attribute_present("tooltip"))
939 {
940 hyperlink.tooltip = parser().attribute("tooltip");
941 }
942
943 cell.d_->hyperlink_ = hyperlink;
944 }
945
946 expect_end_element(qn("spreadsheetml", "hyperlink"));
947 }
948 }
949 else if (current_worksheet_element == qn("spreadsheetml", "printOptions")) // CT_PrintOptions 0-1
950 {
951 print_options opts;
952 if (parser().attribute_present("gridLines"))
953 {
954 opts.print_grid_lines.set(parser().attribute<bool>("gridLines"));
955 }
956 if (parser().attribute_present("gridLinesSet"))
957 {
958 opts.print_grid_lines.set(parser().attribute<bool>("gridLinesSet"));
959 }
960 if (parser().attribute_present("headings"))
961 {
962 opts.print_grid_lines.set(parser().attribute<bool>("headings"));
963 }
964 if (parser().attribute_present("horizontalCentered"))
965 {
966 opts.print_grid_lines.set(parser().attribute<bool>("horizontalCentered"));
967 }
968 if (parser().attribute_present("verticalCentered"))
969 {
970 opts.print_grid_lines.set(parser().attribute<bool>("verticalCentered"));
971 }
972 ws.d_->print_options_.set(opts);
973 skip_remaining_content(current_worksheet_element);
974 }
975 else if (current_worksheet_element == qn("spreadsheetml", "pageMargins")) // CT_PageMargins 0-1
976 {
977 page_margins margins;
978
979 margins.top(converter_.deserialise(parser().attribute("top")));
980 margins.bottom(converter_.deserialise(parser().attribute("bottom")));
981 margins.left(converter_.deserialise(parser().attribute("left")));
982 margins.right(converter_.deserialise(parser().attribute("right")));
983 margins.header(converter_.deserialise(parser().attribute("header")));
984 margins.footer(converter_.deserialise(parser().attribute("footer")));
985
986 ws.page_margins(margins);
987 }
988 else if (current_worksheet_element == qn("spreadsheetml", "pageSetup")) // CT_PageSetup 0-1
989 {
990 page_setup setup;
991 if (parser().attribute_present("orientation"))
992 {
993 setup.orientation_.set(parser().attribute<orientation>("orientation"));
994 }
995 if (parser().attribute_present("horizontalDpi"))
996 {
997 setup.horizontal_dpi_.set(parser().attribute<std::size_t>("horizontalDpi"));
998 }
999 if (parser().attribute_present("verticalDpi"))
1000 {
1001 setup.vertical_dpi_.set(parser().attribute<std::size_t>("verticalDpi"));
1002 }
1003 ws.page_setup(setup);
1004 skip_remaining_content(current_worksheet_element);
1005 }
1006 else if (current_worksheet_element == qn("spreadsheetml", "headerFooter")) // CT_HeaderFooter 0-1
1007 {
1008 header_footer hf;
1009
1010 hf.align_with_margins(!parser().attribute_present("alignWithMargins")
1011 || is_true(parser().attribute("alignWithMargins")));
1012 hf.scale_with_doc(!parser().attribute_present("alignWithMargins")
1013 || is_true(parser().attribute("alignWithMargins")));
1014 auto different_odd_even = parser().attribute_present("differentOddEven")
1015 && is_true(parser().attribute("differentOddEven"));
1016 auto different_first = parser().attribute_present("differentFirst")
1017 && is_true(parser().attribute("differentFirst"));
1018
1019 optional<std::array<optional<rich_text>, 3>> odd_header;
1020 optional<std::array<optional<rich_text>, 3>> odd_footer;
1021 optional<std::array<optional<rich_text>, 3>> even_header;
1022 optional<std::array<optional<rich_text>, 3>> even_footer;
1023 optional<std::array<optional<rich_text>, 3>> first_header;
1024 optional<std::array<optional<rich_text>, 3>> first_footer;
1025
1026 using xlnt::detail::decode_header_footer;
1027
1028 while (in_element(current_worksheet_element))
1029 {
1030 auto current_hf_element = expect_start_element(xml::content::simple);
1031
1032 if (current_hf_element == qn("spreadsheetml", "oddHeader"))
1033 {
1034 odd_header = decode_header_footer(read_text(), converter_);
1035 }
1036 else if (current_hf_element == qn("spreadsheetml", "oddFooter"))
1037 {
1038 odd_footer = decode_header_footer(read_text(), converter_);
1039 }
1040 else if (current_hf_element == qn("spreadsheetml", "evenHeader"))
1041 {
1042 even_header = decode_header_footer(read_text(), converter_);
1043 }
1044 else if (current_hf_element == qn("spreadsheetml", "evenFooter"))
1045 {
1046 even_footer = decode_header_footer(read_text(), converter_);
1047 }
1048 else if (current_hf_element == qn("spreadsheetml", "firstHeader"))
1049 {
1050 first_header = decode_header_footer(read_text(), converter_);
1051 }
1052 else if (current_hf_element == qn("spreadsheetml", "firstFooter"))
1053 {
1054 first_footer = decode_header_footer(read_text(), converter_);
1055 }
1056 else
1057 {
1058 unexpected_element(current_hf_element);
1059 }
1060
1061 expect_end_element(current_hf_element);
1062 }
1063
1064 for (std::size_t i = 0; i < 3; ++i)
1065 {
1066 auto loc = i == 0 ? header_footer::location::left
1067 : i == 1 ? header_footer::location::center : header_footer::location::right;
1068
1069 if (different_odd_even)
1070 {
1071 if (odd_header.is_set()
1072 && odd_header.get().at(i).is_set()
1073 && even_header.is_set()
1074 && even_header.get().at(i).is_set())
1075 {
1076 hf.odd_even_header(loc, odd_header.get().at(i).get(), even_header.get().at(i).get());
1077 }
1078
1079 if (odd_footer.is_set()
1080 && odd_footer.get().at(i).is_set()
1081 && even_footer.is_set()
1082 && even_footer.get().at(i).is_set())
1083 {
1084 hf.odd_even_footer(loc, odd_footer.get().at(i).get(), even_footer.get().at(i).get());
1085 }
1086 }
1087 else
1088 {
1089 if (odd_header.is_set() && odd_header.get().at(i).is_set())
1090 {
1091 hf.header(loc, odd_header.get().at(i).get());
1092 }
1093
1094 if (odd_footer.is_set() && odd_footer.get().at(i).is_set())
1095 {
1096 hf.footer(loc, odd_footer.get().at(i).get());
1097 }
1098 }
1099
1100 if (different_first)
1101 {
1102 }
1103 }
1104
1105 ws.header_footer(hf);
1106 }
1107 else if (current_worksheet_element == qn("spreadsheetml", "rowBreaks")) // CT_PageBreak 0-1
1108 {
1109 auto count = parser().attribute_present("count") ? parser().attribute<std::size_t>("count") : 0;
1110 auto manual_break_count = parser().attribute_present("manualBreakCount")
1111 ? parser().attribute<std::size_t>("manualBreakCount")
1112 : 0;
1113
1114 while (in_element(qn("spreadsheetml", "rowBreaks")))
1115 {
1116 expect_start_element(qn("spreadsheetml", "brk"), xml::content::simple);
1117
1118 if (parser().attribute_present("id"))
1119 {
1120 ws.page_break_at_row(parser().attribute<row_t>("id"));
1121 --count;
1122 }
1123
1124 if (parser().attribute_present("man") && is_true(parser().attribute("man")))
1125 {
1126 --manual_break_count;
1127 }
1128
1129 skip_attributes({"min", "max", "pt"});
1130 expect_end_element(qn("spreadsheetml", "brk"));
1131 }
1132 }
1133 else if (current_worksheet_element == qn("spreadsheetml", "colBreaks")) // CT_PageBreak 0-1
1134 {
1135 auto count = parser().attribute_present("count") ? parser().attribute<std::size_t>("count") : 0;
1136 auto manual_break_count = parser().attribute_present("manualBreakCount")
1137 ? parser().attribute<std::size_t>("manualBreakCount")
1138 : 0;
1139
1140 while (in_element(qn("spreadsheetml", "colBreaks")))
1141 {
1142 expect_start_element(qn("spreadsheetml", "brk"), xml::content::simple);
1143
1144 if (parser().attribute_present("id"))
1145 {
1146 ws.page_break_at_column(parser().attribute<column_t::index_t>("id"));
1147 --count;
1148 }
1149
1150 if (parser().attribute_present("man") && is_true(parser().attribute("man")))
1151 {
1152 --manual_break_count;
1153 }
1154
1155 skip_attributes({"min", "max", "pt"});
1156 expect_end_element(qn("spreadsheetml", "brk"));
1157 }
1158 }
1159 else if (current_worksheet_element == qn("spreadsheetml", "customProperties")) // CT_CustomProperties 0-1
1160 {
1161 skip_remaining_content(current_worksheet_element);
1162 }
1163 else if (current_worksheet_element == qn("spreadsheetml", "cellWatches")) // CT_CellWatches 0-1
1164 {
1165 skip_remaining_content(current_worksheet_element);
1166 }
1167 else if (current_worksheet_element == qn("spreadsheetml", "ignoredErrors")) // CT_IgnoredErrors 0-1
1168 {
1169 skip_remaining_content(current_worksheet_element);
1170 }
1171 else if (current_worksheet_element == qn("spreadsheetml", "smartTags")) // CT_SmartTags 0-1
1172 {
1173 skip_remaining_content(current_worksheet_element);
1174 }
1175 else if (current_worksheet_element == qn("spreadsheetml", "drawing")) // CT_Drawing 0-1
1176 {
1177 if (parser().attribute_present(qn("r", "id")))
1178 {
1179 auto drawing_rel_id = parser().attribute(qn("r", "id"));
1180 ws.d_->drawing_rel_id_ = drawing_rel_id;
1181 }
1182 }
1183 else if (current_worksheet_element == qn("spreadsheetml", "legacyDrawing"))
1184 {
1185 skip_remaining_content(current_worksheet_element);
1186 }
1187 else if (current_worksheet_element == qn("spreadsheetml", "extLst"))
1188 {
1189 ext_list extensions(parser(), current_worksheet_element.namespace_());
1190 ws.d_->extension_list_.set(extensions);
1191 }
1192 else
1193 {
1194 unexpected_element(current_worksheet_element);
1195 }
1196
1197 expect_end_element(current_worksheet_element);
1198 }
1199
1200 expect_end_element(qn("spreadsheetml", "worksheet"));
1201
1202 if (manifest.has_relationship(sheet_path, xlnt::relationship_type::comments))
1203 {
1204 auto comments_part = manifest.canonicalize({workbook_rel, sheet_rel,
1205 manifest.relationship(sheet_path, xlnt::relationship_type::comments)});
1206
1207 auto receive = xml::parser::receive_default;
1208 auto comments_part_streambuf = archive_->open(comments_part);
1209 std::istream comments_part_stream(comments_part_streambuf.get());
1210 xml::parser parser(comments_part_stream, comments_part.string(), receive);
1211 parser_ = &parser;
1212
1213 read_comments(ws);
1214
1215 if (manifest.has_relationship(sheet_path, xlnt::relationship_type::vml_drawing))
1216 {
1217 auto vml_drawings_part = manifest.canonicalize({workbook_rel, sheet_rel,
1218 manifest.relationship(sheet_path, xlnt::relationship_type::vml_drawing)});
1219
1220 auto vml_drawings_part_streambuf = archive_->open(comments_part);
1221 std::istream vml_drawings_part_stream(comments_part_streambuf.get());
1222 xml::parser vml_parser(vml_drawings_part_stream, vml_drawings_part.string(), receive);
1223 parser_ = &vml_parser;
1224
1225 read_vml_drawings(ws);
1226 }
1227 }
1228
1229 if (manifest.has_relationship(sheet_path, xlnt::relationship_type::drawings))
1230 {
1231 auto drawings_part = manifest.canonicalize({workbook_rel, sheet_rel,
1232 manifest.relationship(sheet_path, xlnt::relationship_type::drawings)});
1233
1234 auto receive = xml::parser::receive_default;
1235 auto drawings_part_streambuf = archive_->open(drawings_part);
1236 std::istream drawings_part_stream(drawings_part_streambuf.get());
1237 xml::parser parser(drawings_part_stream, drawings_part.string(), receive);
1238 parser_ = &parser;
1239
1240 read_drawings(ws, drawings_part);
1241 }
1242
1243 return ws;
1244 }
1245
parser()1246 xml::parser &xlsx_consumer::parser()
1247 {
1248 return *parser_;
1249 }
1250
has_cell()1251 bool xlsx_consumer::has_cell()
1252 {
1253 auto ws = worksheet(current_worksheet_);
1254
1255 while (streaming_cell_ // we're not at the end of the file
1256 && !in_element(qn("spreadsheetml", "row"))) // we're at the end of a row, or between rows
1257 {
1258 if (parser().peek() == xml::parser::event_type::end_element
1259 && stack_.back() == qn("spreadsheetml", "row"))
1260 {
1261 // We're at the end of a row.
1262 expect_end_element(qn("spreadsheetml", "row"));
1263 // ... and keep parsing.
1264 }
1265
1266 if (parser().peek() == xml::parser::event_type::end_element
1267 && stack_.back() == qn("spreadsheetml", "sheetData"))
1268 {
1269 // End of sheet. Mark it by setting streaming_cell_ to nullptr, so we never get here again.
1270 expect_end_element(qn("spreadsheetml", "sheetData"));
1271 streaming_cell_.reset(nullptr);
1272 break;
1273 }
1274
1275 expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
1276 auto row_index = static_cast<row_t>(std::stoul(parser().attribute("r")));
1277 auto &row_properties = ws.row_properties(row_index);
1278
1279 if (parser().attribute_present("ht"))
1280 {
1281 row_properties.height = converter_.deserialise(parser().attribute("ht"));
1282 }
1283
1284 if (parser().attribute_present("customHeight"))
1285 {
1286 row_properties.custom_height = is_true(parser().attribute("customHeight"));
1287 }
1288
1289 if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
1290 {
1291 row_properties.hidden = true;
1292 }
1293
1294 if (parser().attribute_present(qn("x14ac", "dyDescent")))
1295 {
1296 row_properties.dy_descent = converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
1297 }
1298
1299 if (parser().attribute_present("spans"))
1300 {
1301 row_properties.spans = parser().attribute("spans");
1302 }
1303
1304 skip_attributes({"customFormat", "s", "customFont",
1305 "outlineLevel", "collapsed", "thickTop", "thickBot",
1306 "ph"});
1307 }
1308
1309 if (!streaming_cell_)
1310 {
1311 // We're at the end of the worksheet
1312 return false;
1313 }
1314
1315 expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
1316
1317 assert(streaming_);
1318 auto cell = xlnt::cell(streaming_cell_.get());
1319 auto reference = cell_reference(parser().attribute("r"));
1320 cell.d_->parent_ = current_worksheet_;
1321 cell.d_->column_ = reference.column_index();
1322 cell.d_->row_ = reference.row();
1323
1324 if (parser().attribute_present("ph"))
1325 {
1326 cell.d_->phonetics_visible_ = parser().attribute<bool>("ph");
1327 }
1328
1329 auto has_type = parser().attribute_present("t");
1330 auto type = has_type ? parser().attribute("t") : "n";
1331
1332 if (parser().attribute_present("s"))
1333 {
1334 cell.format(target_.format(static_cast<std::size_t>(std::stoull(parser().attribute("s")))));
1335 }
1336
1337 auto has_value = false;
1338 auto value_string = std::string();
1339
1340 auto has_formula = false;
1341 auto has_shared_formula = false;
1342 auto formula_value_string = std::string();
1343
1344 while (in_element(qn("spreadsheetml", "c")))
1345 {
1346 auto current_element = expect_start_element(xml::content::mixed);
1347
1348 if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
1349 {
1350 has_value = true;
1351 value_string = read_text();
1352 }
1353 else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
1354 {
1355 has_formula = true;
1356
1357 if (parser().attribute_present("t"))
1358 {
1359 has_shared_formula = parser().attribute("t") == "shared";
1360 }
1361
1362 skip_attributes({"aca", "ref", "dt2D", "dtr", "del1",
1363 "del2", "r1", "r2", "ca", "si", "bx"});
1364
1365 formula_value_string = read_text();
1366 }
1367 else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
1368 {
1369 expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
1370 has_value = true;
1371 value_string = read_text();
1372 expect_end_element(qn("spreadsheetml", "t"));
1373 }
1374 else
1375 {
1376 unexpected_element(current_element);
1377 }
1378
1379 expect_end_element(current_element);
1380 }
1381
1382 expect_end_element(qn("spreadsheetml", "c"));
1383
1384 if (has_formula && !has_shared_formula)
1385 {
1386 cell.formula(formula_value_string);
1387 }
1388
1389 if (has_value)
1390 {
1391 if (type == "str")
1392 {
1393 cell.d_->value_text_ = value_string;
1394 cell.data_type(cell::type::formula_string);
1395 }
1396 else if (type == "inlineStr")
1397 {
1398 cell.d_->value_text_ = value_string;
1399 cell.data_type(cell::type::inline_string);
1400 }
1401 else if (type == "s")
1402 {
1403 cell.d_->value_numeric_ = converter_.deserialise(value_string);
1404 cell.data_type(cell::type::shared_string);
1405 }
1406 else if (type == "b") // boolean
1407 {
1408 cell.value(is_true(value_string));
1409 }
1410 else if (type == "n") // numeric
1411 {
1412 cell.value(converter_.deserialise(value_string));
1413 }
1414 else if (!value_string.empty() && value_string[0] == '#')
1415 {
1416 cell.error(value_string);
1417 }
1418 }
1419
1420 return true;
1421 }
1422
read_relationships(const path & part)1423 std::vector<relationship> xlsx_consumer::read_relationships(const path &part)
1424 {
1425 const auto part_rels_path = part.parent().append("_rels").append(part.filename() + ".rels").relative_to(path("/"));
1426
1427 std::vector<xlnt::relationship> relationships;
1428 if (!archive_->has_file(part_rels_path)) return relationships;
1429
1430 auto rels_streambuf = archive_->open(part_rels_path);
1431 std::istream rels_stream(rels_streambuf.get());
1432 xml::parser parser(rels_stream, part_rels_path.string());
1433 parser_ = &parser;
1434
1435 expect_start_element(qn("relationships", "Relationships"), xml::content::complex);
1436
1437 while (in_element(qn("relationships", "Relationships")))
1438 {
1439 expect_start_element(qn("relationships", "Relationship"), xml::content::simple);
1440
1441 const auto target_mode = parser.attribute_present("TargetMode")
1442 ? parser.attribute<xlnt::target_mode>("TargetMode")
1443 : xlnt::target_mode::internal;
1444 auto target = xlnt::uri(parser.attribute("Target"));
1445
1446 if (target.path().is_absolute() && target_mode == xlnt::target_mode::internal)
1447 {
1448 target = uri(target.path().relative_to(path(part.string()).resolve(path("/"))).string());
1449 }
1450
1451 relationships.emplace_back(parser.attribute("Id"),
1452 parser.attribute<xlnt::relationship_type>("Type"),
1453 xlnt::uri(part.string()), target, target_mode);
1454
1455 expect_end_element(qn("relationships", "Relationship"));
1456 }
1457
1458 expect_end_element(qn("relationships", "Relationships"));
1459 parser_ = nullptr;
1460
1461 return relationships;
1462 }
1463
read_part(const std::vector<relationship> & rel_chain)1464 void xlsx_consumer::read_part(const std::vector<relationship> &rel_chain)
1465 {
1466 const auto &manifest = target_.manifest();
1467 const auto part_path = manifest.canonicalize(rel_chain);
1468 auto part_streambuf = archive_->open(part_path);
1469 std::istream part_stream(part_streambuf.get());
1470 xml::parser parser(part_stream, part_path.string());
1471 parser_ = &parser;
1472
1473 switch (rel_chain.back().type())
1474 {
1475 case relationship_type::core_properties:
1476 read_core_properties();
1477 break;
1478
1479 case relationship_type::extended_properties:
1480 read_extended_properties();
1481 break;
1482
1483 case relationship_type::custom_properties:
1484 read_custom_properties();
1485 break;
1486
1487 case relationship_type::office_document:
1488 read_office_document(manifest.content_type(part_path));
1489 break;
1490
1491 case relationship_type::connections:
1492 read_connections();
1493 break;
1494
1495 case relationship_type::custom_xml_mappings:
1496 read_custom_xml_mappings();
1497 break;
1498
1499 case relationship_type::external_workbook_references:
1500 read_external_workbook_references();
1501 break;
1502
1503 case relationship_type::pivot_table:
1504 read_pivot_table();
1505 break;
1506
1507 case relationship_type::shared_workbook_revision_headers:
1508 read_shared_workbook_revision_headers();
1509 break;
1510
1511 case relationship_type::volatile_dependencies:
1512 read_volatile_dependencies();
1513 break;
1514
1515 case relationship_type::shared_string_table:
1516 read_shared_string_table();
1517 break;
1518
1519 case relationship_type::stylesheet:
1520 read_stylesheet();
1521 break;
1522
1523 case relationship_type::theme:
1524 read_theme();
1525 break;
1526
1527 case relationship_type::chartsheet:
1528 read_chartsheet(rel_chain.back().id());
1529 break;
1530
1531 case relationship_type::dialogsheet:
1532 read_dialogsheet(rel_chain.back().id());
1533 break;
1534
1535 case relationship_type::worksheet:
1536 read_worksheet(rel_chain.back().id());
1537 break;
1538
1539 case relationship_type::thumbnail:
1540 read_image(part_path);
1541 break;
1542
1543 case relationship_type::calculation_chain:
1544 read_calculation_chain();
1545 break;
1546
1547 case relationship_type::hyperlink:
1548 break;
1549
1550 case relationship_type::comments:
1551 break;
1552
1553 case relationship_type::vml_drawing:
1554 break;
1555
1556 case relationship_type::unknown:
1557 break;
1558
1559 case relationship_type::printer_settings:
1560 break;
1561
1562 case relationship_type::custom_property:
1563 break;
1564
1565 case relationship_type::drawings:
1566 break;
1567
1568 case relationship_type::pivot_table_cache_definition:
1569 break;
1570
1571 case relationship_type::pivot_table_cache_records:
1572 break;
1573
1574 case relationship_type::query_table:
1575 break;
1576
1577 case relationship_type::shared_workbook:
1578 break;
1579
1580 case relationship_type::revision_log:
1581 break;
1582
1583 case relationship_type::shared_workbook_user_data:
1584 break;
1585
1586 case relationship_type::single_cell_table_definitions:
1587 break;
1588
1589 case relationship_type::table_definition:
1590 break;
1591
1592 case relationship_type::image:
1593 read_image(part_path);
1594 break;
1595 }
1596
1597 parser_ = nullptr;
1598 }
1599
populate_workbook(bool streaming)1600 void xlsx_consumer::populate_workbook(bool streaming)
1601 {
1602 streaming_ = streaming;
1603
1604 target_.clear();
1605
1606 read_content_types();
1607 const auto root_path = path("/");
1608
1609 for (const auto &package_rel : read_relationships(root_path))
1610 {
1611 manifest().register_relationship(package_rel);
1612 }
1613
1614 for (auto package_rel : manifest().relationships(root_path))
1615 {
1616 if (package_rel.type() == relationship_type::office_document)
1617 {
1618 // Read the workbook after all the other package parts
1619 continue;
1620 }
1621
1622 read_part({package_rel});
1623 }
1624
1625 for (const auto &relationship_source_string : archive_->files())
1626 {
1627 for (const auto &part_rel : read_relationships(path(relationship_source_string)))
1628 {
1629 manifest().register_relationship(part_rel);
1630 }
1631 }
1632
1633 read_part({manifest().relationship(root_path,
1634 relationship_type::office_document)});
1635 }
1636
1637 // Package Parts
1638
read_content_types()1639 void xlsx_consumer::read_content_types()
1640 {
1641 auto &manifest = target_.manifest();
1642 auto content_types_streambuf = archive_->open(path("[Content_Types].xml"));
1643 std::istream content_types_stream(content_types_streambuf.get());
1644 xml::parser parser(content_types_stream, "[Content_Types].xml");
1645 parser_ = &parser;
1646
1647 expect_start_element(qn("content-types", "Types"), xml::content::complex);
1648
1649 while (in_element(qn("content-types", "Types")))
1650 {
1651 auto current_element = expect_start_element(xml::content::complex);
1652
1653 if (current_element == qn("content-types", "Default"))
1654 {
1655 auto extension = parser.attribute("Extension");
1656 auto content_type = parser.attribute("ContentType");
1657 manifest.register_default_type(extension, content_type);
1658 }
1659 else if (current_element == qn("content-types", "Override"))
1660 {
1661 auto part_name = parser.attribute("PartName");
1662 auto content_type = parser.attribute("ContentType");
1663 manifest.register_override_type(path(part_name), content_type);
1664 }
1665 else
1666 {
1667 unexpected_element(current_element);
1668 }
1669
1670 expect_end_element(current_element);
1671 }
1672
1673 expect_end_element(qn("content-types", "Types"));
1674 }
1675
read_core_properties()1676 void xlsx_consumer::read_core_properties()
1677 {
1678 //qn("extended-properties", "Properties");
1679 //qn("custom-properties", "Properties");
1680 expect_start_element(qn("core-properties", "coreProperties"), xml::content::complex);
1681
1682 while (in_element(qn("core-properties", "coreProperties")))
1683 {
1684 const auto property_element = expect_start_element(xml::content::simple);
1685 const auto prop = detail::from_string<core_property>(property_element.name());
1686 if (prop == core_property::created || prop == core_property::modified)
1687 {
1688 skip_attribute(qn("xsi", "type"));
1689 }
1690 target_.core_property(prop, read_text());
1691 expect_end_element(property_element);
1692 }
1693
1694 expect_end_element(qn("core-properties", "coreProperties"));
1695 }
1696
read_extended_properties()1697 void xlsx_consumer::read_extended_properties()
1698 {
1699 expect_start_element(qn("extended-properties", "Properties"), xml::content::complex);
1700
1701 while (in_element(qn("extended-properties", "Properties")))
1702 {
1703 const auto property_element = expect_start_element(xml::content::mixed);
1704 const auto prop = detail::from_string<extended_property>(property_element.name());
1705 target_.extended_property(prop, read_variant());
1706 expect_end_element(property_element);
1707 }
1708
1709 expect_end_element(qn("extended-properties", "Properties"));
1710 }
1711
read_custom_properties()1712 void xlsx_consumer::read_custom_properties()
1713 {
1714 expect_start_element(qn("custom-properties", "Properties"), xml::content::complex);
1715
1716 while (in_element(qn("custom-properties", "Properties")))
1717 {
1718 const auto property_element = expect_start_element(xml::content::complex);
1719 const auto prop = parser().attribute("name");
1720 const auto format_id = parser().attribute("fmtid");
1721 const auto property_id = parser().attribute("pid");
1722 target_.custom_property(prop, read_variant());
1723 expect_end_element(property_element);
1724 }
1725
1726 expect_end_element(qn("custom-properties", "Properties"));
1727 }
1728
read_office_document(const std::string & content_type)1729 void xlsx_consumer::read_office_document(const std::string &content_type) // CT_Workbook
1730 {
1731 if (content_type !=
1732 "application/vnd."
1733 "openxmlformats-officedocument.spreadsheetml.sheet.main+xml"
1734 && content_type !=
1735 "application/vnd."
1736 "openxmlformats-officedocument.spreadsheetml.template.main+xml")
1737 {
1738 throw xlnt::invalid_file(content_type);
1739 }
1740
1741 target_.d_->calculation_properties_.clear();
1742
1743 expect_start_element(qn("workbook", "workbook"), xml::content::complex);
1744 skip_attribute(qn("mc", "Ignorable"));
1745
1746 while (in_element(qn("workbook", "workbook")))
1747 {
1748 auto current_workbook_element = expect_start_element(xml::content::complex);
1749
1750 if (current_workbook_element == qn("workbook", "fileVersion")) // CT_FileVersion 0-1
1751 {
1752 detail::workbook_impl::file_version_t file_version;
1753
1754 if (parser().attribute_present("appName"))
1755 {
1756 file_version.app_name = parser().attribute("appName");
1757 }
1758
1759 if (parser().attribute_present("lastEdited"))
1760 {
1761 file_version.last_edited = parser().attribute<std::size_t>("lastEdited");
1762 }
1763
1764 if (parser().attribute_present("lowestEdited"))
1765 {
1766 file_version.lowest_edited = parser().attribute<std::size_t>("lowestEdited");
1767 }
1768
1769 if (parser().attribute_present("lowestEdited"))
1770 {
1771 file_version.rup_build = parser().attribute<std::size_t>("rupBuild");
1772 }
1773
1774 skip_attribute("codeName");
1775
1776 target_.d_->file_version_ = file_version;
1777 }
1778 else if (current_workbook_element == qn("workbook", "fileSharing")) // CT_FileSharing 0-1
1779 {
1780 skip_remaining_content(current_workbook_element);
1781 }
1782 else if (current_workbook_element == qn("mc", "AlternateContent"))
1783 {
1784 while (in_element(qn("mc", "AlternateContent")))
1785 {
1786 auto alternate_content_element = expect_start_element(xml::content::complex);
1787
1788 if (alternate_content_element == qn("mc", "Choice")
1789 && parser().attribute_present("Requires")
1790 && parser().attribute("Requires") == "x15")
1791 {
1792 auto x15_element = expect_start_element(xml::content::simple);
1793
1794 if (x15_element == qn("x15ac", "absPath"))
1795 {
1796 target_.d_->abs_path_ = parser().attribute("url");
1797 }
1798
1799 skip_remaining_content(x15_element);
1800 expect_end_element(x15_element);
1801 }
1802
1803 skip_remaining_content(alternate_content_element);
1804 expect_end_element(alternate_content_element);
1805 }
1806 }
1807 else if (current_workbook_element == qn("workbook", "workbookPr")) // CT_WorkbookPr 0-1
1808 {
1809 target_.base_date(parser().attribute_present("date1904") // optional, bool=false
1810 && is_true(parser().attribute("date1904"))
1811 ? calendar::mac_1904
1812 : calendar::windows_1900);
1813 skip_attribute("showObjects"); // optional, ST_Objects="all"
1814 skip_attribute("showBorderUnselectedTables"); // optional, bool=true
1815 skip_attribute("filterPrivacy"); // optional, bool=false
1816 skip_attribute("promptedSolutions"); // optional, bool=false
1817 skip_attribute("showInkAnnotation"); // optional, bool=true
1818 skip_attribute("backupFile"); // optional, bool=false
1819 skip_attribute("saveExternalLinkValues"); // optional, bool=true
1820 skip_attribute("updateLinks"); // optional, ST_UpdateLinks="userSet"
1821 skip_attribute("codeName"); // optional, string
1822 skip_attribute("hidePivotFieldList"); // optional, bool=false
1823 skip_attribute("showPivotChartFilter"); // optional, bool=false
1824 skip_attribute("allowRefreshQuery"); // optional, bool=false
1825 skip_attribute("publishItems"); // optional, bool=false
1826 skip_attribute("checkCompatibility"); // optional, bool=false
1827 skip_attribute("autoCompressPictures"); // optional, bool=true
1828 skip_attribute("refreshAllConnections"); // optional, bool=false
1829 skip_attribute("defaultThemeVersion"); // optional, uint
1830 skip_attribute("dateCompatibility"); // optional, bool (undocumented)
1831 }
1832 else if (current_workbook_element == qn("workbook", "workbookProtection")) // CT_WorkbookProtection 0-1
1833 {
1834 skip_remaining_content(current_workbook_element);
1835 }
1836 else if (current_workbook_element == qn("workbook", "bookViews")) // CT_BookViews 0-1
1837 {
1838 while (in_element(qn("workbook", "bookViews")))
1839 {
1840 expect_start_element(qn("workbook", "workbookView"), xml::content::simple);
1841 skip_attributes({"firstSheet", "showHorizontalScroll",
1842 "showSheetTabs", "showVerticalScroll"});
1843
1844 workbook_view view;
1845
1846 if (parser().attribute_present("xWindow"))
1847 {
1848 view.x_window = parser().attribute<int>("xWindow");
1849 }
1850
1851 if (parser().attribute_present("yWindow"))
1852 {
1853 view.y_window = parser().attribute<int>("yWindow");
1854 }
1855
1856 if (parser().attribute_present("windowWidth"))
1857 {
1858 view.window_width = parser().attribute<std::size_t>("windowWidth");
1859 }
1860
1861 if (parser().attribute_present("windowHeight"))
1862 {
1863 view.window_height = parser().attribute<std::size_t>("windowHeight");
1864 }
1865
1866 if (parser().attribute_present("tabRatio"))
1867 {
1868 view.tab_ratio = parser().attribute<std::size_t>("tabRatio");
1869 }
1870
1871 if (parser().attribute_present("activeTab"))
1872 {
1873 view.active_tab = parser().attribute<std::size_t>("activeTab");
1874 }
1875
1876 target_.view(view);
1877
1878 skip_attributes();
1879 expect_end_element(qn("workbook", "workbookView"));
1880 }
1881 }
1882 else if (current_workbook_element == qn("workbook", "sheets")) // CT_Sheets 1
1883 {
1884 std::size_t index = 0;
1885
1886 while (in_element(qn("workbook", "sheets")))
1887 {
1888 expect_start_element(qn("spreadsheetml", "sheet"), xml::content::simple);
1889
1890 auto title = parser().attribute("name");
1891
1892 sheet_title_index_map_[title] = index++;
1893 sheet_title_id_map_[title] = parser().attribute<std::size_t>("sheetId");
1894 target_.d_->sheet_title_rel_id_map_[title] = parser().attribute(qn("r", "id"));
1895
1896 bool hidden = parser().attribute<std::string>("state", "") == "hidden";
1897 target_.d_->sheet_hidden_.push_back(hidden);
1898
1899 expect_end_element(qn("spreadsheetml", "sheet"));
1900 }
1901 }
1902 else if (current_workbook_element == qn("workbook", "functionGroups")) // CT_FunctionGroups 0-1
1903 {
1904 skip_remaining_content(current_workbook_element);
1905 }
1906 else if (current_workbook_element == qn("workbook", "externalReferences")) // CT_ExternalReferences 0-1
1907 {
1908 skip_remaining_content(current_workbook_element);
1909 }
1910 else if (current_workbook_element == qn("workbook", "definedNames")) // CT_DefinedNames 0-1
1911 {
1912 skip_remaining_content(current_workbook_element);
1913 }
1914 else if (current_workbook_element == qn("workbook", "calcPr")) // CT_CalcPr 0-1
1915 {
1916 xlnt::calculation_properties calc_props;
1917 if (parser().attribute_present("calcId"))
1918 {
1919 calc_props.calc_id = parser().attribute<std::size_t>("calcId");
1920 }
1921 if (parser().attribute_present("concurrentCalc"))
1922 {
1923 calc_props.concurrent_calc = is_true(parser().attribute("concurrentCalc"));
1924 }
1925 target_.calculation_properties(calc_props);
1926 parser().attribute_map(); // skip remaining
1927 }
1928 else if (current_workbook_element == qn("workbook", "oleSize")) // CT_OleSize 0-1
1929 {
1930 skip_remaining_content(current_workbook_element);
1931 }
1932 else if (current_workbook_element == qn("workbook", "customWorkbookViews")) // CT_CustomWorkbookViews 0-1
1933 {
1934 skip_remaining_content(current_workbook_element);
1935 }
1936 else if (current_workbook_element == qn("workbook", "pivotCaches")) // CT_PivotCaches 0-1
1937 {
1938 skip_remaining_content(current_workbook_element);
1939 }
1940 else if (current_workbook_element == qn("workbook", "smartTagPr")) // CT_SmartTagPr 0-1
1941 {
1942 skip_remaining_content(current_workbook_element);
1943 }
1944 else if (current_workbook_element == qn("workbook", "smartTagTypes")) // CT_SmartTagTypes 0-1
1945 {
1946 skip_remaining_content(current_workbook_element);
1947 }
1948 else if (current_workbook_element == qn("workbook", "webPublishing")) // CT_WebPublishing 0-1
1949 {
1950 skip_remaining_content(current_workbook_element);
1951 }
1952 else if (current_workbook_element == qn("workbook", "fileRecoveryPr")) // CT_FileRecoveryPr 0+
1953 {
1954 skip_remaining_content(current_workbook_element);
1955 }
1956 else if (current_workbook_element == qn("workbook", "webPublishObjects")) // CT_WebPublishObjects 0-1
1957 {
1958 skip_remaining_content(current_workbook_element);
1959 }
1960 else if (current_workbook_element == qn("workbook", "extLst")) // CT_ExtensionList 0-1
1961 {
1962 while (in_element(qn("workbook", "extLst")))
1963 {
1964 auto extension_element = expect_start_element(xml::content::complex);
1965
1966 if (extension_element == qn("workbook", "ext")
1967 && parser().attribute_present("uri")
1968 && parser().attribute("uri") == "{7523E5D3-25F3-A5E0-1632-64F254C22452}")
1969 {
1970 auto arch_id_extension_element = expect_start_element(xml::content::simple);
1971
1972 if (arch_id_extension_element == qn("mx", "ArchID"))
1973 {
1974 target_.d_->arch_id_flags_ = parser().attribute<std::size_t>("Flags");
1975 }
1976
1977 skip_remaining_content(arch_id_extension_element);
1978 expect_end_element(arch_id_extension_element);
1979 }
1980
1981 skip_remaining_content(extension_element);
1982 expect_end_element(extension_element);
1983 }
1984 }
1985 else
1986 {
1987 unexpected_element(current_workbook_element);
1988 }
1989
1990 expect_end_element(current_workbook_element);
1991 }
1992
1993 expect_end_element(qn("workbook", "workbook"));
1994
1995 auto workbook_rel = manifest().relationship(path("/"), relationship_type::office_document);
1996 auto workbook_path = workbook_rel.target().path();
1997
1998 if (manifest().has_relationship(workbook_path, relationship_type::shared_string_table))
1999 {
2000 read_part({workbook_rel,
2001 manifest().relationship(workbook_path,
2002 relationship_type::shared_string_table)});
2003 }
2004
2005 if (manifest().has_relationship(workbook_path, relationship_type::stylesheet))
2006 {
2007 read_part({workbook_rel,
2008 manifest().relationship(workbook_path,
2009 relationship_type::stylesheet)});
2010 }
2011
2012 if (manifest().has_relationship(workbook_path, relationship_type::theme))
2013 {
2014 read_part({workbook_rel,
2015 manifest().relationship(workbook_path,
2016 relationship_type::theme)});
2017 }
2018
2019 for (auto worksheet_rel : manifest().relationships(workbook_path, relationship_type::worksheet))
2020 {
2021 auto title = std::find_if(target_.d_->sheet_title_rel_id_map_.begin(),
2022 target_.d_->sheet_title_rel_id_map_.end(),
2023 [&](const std::pair<std::string, std::string> &p) {
2024 return p.second == worksheet_rel.id();
2025 })
2026 ->first;
2027
2028 auto id = sheet_title_id_map_[title];
2029 auto index = sheet_title_index_map_[title];
2030
2031 auto insertion_iter = target_.d_->worksheets_.begin();
2032 while (insertion_iter != target_.d_->worksheets_.end() && sheet_title_index_map_[insertion_iter->title_] < index)
2033 {
2034 ++insertion_iter;
2035 }
2036
2037 current_worksheet_ = &*target_.d_->worksheets_.emplace(insertion_iter, &target_, id, title);
2038
2039 if (!streaming_)
2040 {
2041 read_part({workbook_rel, worksheet_rel});
2042 }
2043 }
2044 }
2045
2046 // Write Workbook Relationship Target Parts
2047
read_calculation_chain()2048 void xlsx_consumer::read_calculation_chain()
2049 {
2050 }
2051
read_chartsheet(const std::string &)2052 void xlsx_consumer::read_chartsheet(const std::string & /*title*/)
2053 {
2054 }
2055
read_connections()2056 void xlsx_consumer::read_connections()
2057 {
2058 }
2059
read_custom_property()2060 void xlsx_consumer::read_custom_property()
2061 {
2062 }
2063
read_custom_xml_mappings()2064 void xlsx_consumer::read_custom_xml_mappings()
2065 {
2066 }
2067
read_dialogsheet(const std::string &)2068 void xlsx_consumer::read_dialogsheet(const std::string & /*title*/)
2069 {
2070 }
2071
read_external_workbook_references()2072 void xlsx_consumer::read_external_workbook_references()
2073 {
2074 }
2075
read_pivot_table()2076 void xlsx_consumer::read_pivot_table()
2077 {
2078 }
2079
read_shared_string_table()2080 void xlsx_consumer::read_shared_string_table()
2081 {
2082 expect_start_element(qn("spreadsheetml", "sst"), xml::content::complex);
2083 skip_attributes({"count"});
2084
2085 bool has_unique_count = false;
2086 std::size_t unique_count = 0;
2087
2088 if (parser().attribute_present("uniqueCount"))
2089 {
2090 has_unique_count = true;
2091 unique_count = parser().attribute<std::size_t>("uniqueCount");
2092 }
2093
2094 while (in_element(qn("spreadsheetml", "sst")))
2095 {
2096 expect_start_element(qn("spreadsheetml", "si"), xml::content::complex);
2097 auto rt = read_rich_text(qn("spreadsheetml", "si"));
2098 target_.add_shared_string(rt, true);
2099 expect_end_element(qn("spreadsheetml", "si"));
2100 }
2101
2102 expect_end_element(qn("spreadsheetml", "sst"));
2103
2104 if (has_unique_count && unique_count != target_.shared_strings().size())
2105 {
2106 throw invalid_file("sizes don't match");
2107 }
2108 }
2109
read_shared_workbook_revision_headers()2110 void xlsx_consumer::read_shared_workbook_revision_headers()
2111 {
2112 }
2113
read_shared_workbook()2114 void xlsx_consumer::read_shared_workbook()
2115 {
2116 }
2117
read_shared_workbook_user_data()2118 void xlsx_consumer::read_shared_workbook_user_data()
2119 {
2120 }
2121
read_stylesheet()2122 void xlsx_consumer::read_stylesheet()
2123 {
2124 target_.impl().stylesheet_ = detail::stylesheet();
2125 auto &stylesheet = target_.impl().stylesheet_.get();
2126
2127 expect_start_element(qn("spreadsheetml", "styleSheet"), xml::content::complex);
2128 skip_attributes({qn("mc", "Ignorable")});
2129
2130 std::vector<std::pair<style_impl, std::size_t>> styles;
2131 std::vector<std::pair<format_impl, std::size_t>> format_records;
2132 std::vector<std::pair<format_impl, std::size_t>> style_records;
2133
2134 while (in_element(qn("spreadsheetml", "styleSheet")))
2135 {
2136 auto current_style_element = expect_start_element(xml::content::complex);
2137
2138 if (current_style_element == qn("spreadsheetml", "borders"))
2139 {
2140 auto &borders = stylesheet.borders;
2141 auto count = parser().attribute<std::size_t>("count");
2142
2143 while (in_element(qn("spreadsheetml", "borders")))
2144 {
2145 borders.push_back(xlnt::border());
2146 auto &border = borders.back();
2147
2148 expect_start_element(qn("spreadsheetml", "border"), xml::content::complex);
2149
2150 auto diagonal = diagonal_direction::neither;
2151
2152 if (parser().attribute_present("diagonalDown") && parser().attribute("diagonalDown") == "1")
2153 {
2154 diagonal = diagonal_direction::down;
2155 }
2156
2157 if (parser().attribute_present("diagonalUp") && parser().attribute("diagonalUp") == "1")
2158 {
2159 diagonal = diagonal == diagonal_direction::down ? diagonal_direction::both : diagonal_direction::up;
2160 }
2161
2162 if (diagonal != diagonal_direction::neither)
2163 {
2164 border.diagonal(diagonal);
2165 }
2166
2167 while (in_element(qn("spreadsheetml", "border")))
2168 {
2169 auto current_side_element = expect_start_element(xml::content::complex);
2170
2171 xlnt::border::border_property side;
2172
2173 if (parser().attribute_present("style"))
2174 {
2175 side.style(parser().attribute<xlnt::border_style>("style"));
2176 }
2177
2178 if (in_element(current_side_element))
2179 {
2180 expect_start_element(qn("spreadsheetml", "color"), xml::content::complex);
2181 side.color(read_color());
2182 expect_end_element(qn("spreadsheetml", "color"));
2183 }
2184
2185 expect_end_element(current_side_element);
2186
2187 auto side_type = xml::value_traits<xlnt::border_side>::parse(current_side_element.name(), parser());
2188 border.side(side_type, side);
2189 }
2190
2191 expect_end_element(qn("spreadsheetml", "border"));
2192 }
2193
2194 if (count != borders.size())
2195 {
2196 throw xlnt::exception("border counts don't match");
2197 }
2198 }
2199 else if (current_style_element == qn("spreadsheetml", "fills"))
2200 {
2201 auto &fills = stylesheet.fills;
2202 auto count = parser().attribute<std::size_t>("count");
2203
2204 while (in_element(qn("spreadsheetml", "fills")))
2205 {
2206 fills.push_back(xlnt::fill());
2207 auto &new_fill = fills.back();
2208
2209 expect_start_element(qn("spreadsheetml", "fill"), xml::content::complex);
2210 auto fill_element = expect_start_element(xml::content::complex);
2211
2212 if (fill_element == qn("spreadsheetml", "patternFill"))
2213 {
2214 xlnt::pattern_fill pattern;
2215
2216 if (parser().attribute_present("patternType"))
2217 {
2218 pattern.type(parser().attribute<xlnt::pattern_fill_type>("patternType"));
2219
2220 while (in_element(qn("spreadsheetml", "patternFill")))
2221 {
2222 auto pattern_type_element = expect_start_element(xml::content::complex);
2223
2224 if (pattern_type_element == qn("spreadsheetml", "fgColor"))
2225 {
2226 pattern.foreground(read_color());
2227 }
2228 else if (pattern_type_element == qn("spreadsheetml", "bgColor"))
2229 {
2230 pattern.background(read_color());
2231 }
2232 else
2233 {
2234 unexpected_element(pattern_type_element);
2235 }
2236
2237 expect_end_element(pattern_type_element);
2238 }
2239 }
2240
2241 new_fill = pattern;
2242 }
2243 else if (fill_element == qn("spreadsheetml", "gradientFill"))
2244 {
2245 xlnt::gradient_fill gradient;
2246
2247 if (parser().attribute_present("type"))
2248 {
2249 gradient.type(parser().attribute<xlnt::gradient_fill_type>("type"));
2250 }
2251 else
2252 {
2253 gradient.type(xlnt::gradient_fill_type::linear);
2254 }
2255
2256 while (in_element(qn("spreadsheetml", "gradientFill")))
2257 {
2258 expect_start_element(qn("spreadsheetml", "stop"), xml::content::complex);
2259 auto position = converter_.deserialise(parser().attribute("position"));
2260 expect_start_element(qn("spreadsheetml", "color"), xml::content::complex);
2261 auto color = read_color();
2262 expect_end_element(qn("spreadsheetml", "color"));
2263 expect_end_element(qn("spreadsheetml", "stop"));
2264
2265 gradient.add_stop(position, color);
2266 }
2267
2268 new_fill = gradient;
2269 }
2270 else
2271 {
2272 unexpected_element(fill_element);
2273 }
2274
2275 expect_end_element(fill_element);
2276 expect_end_element(qn("spreadsheetml", "fill"));
2277 }
2278
2279 if (count != fills.size())
2280 {
2281 throw xlnt::exception("counts don't match");
2282 }
2283 }
2284 else if (current_style_element == qn("spreadsheetml", "fonts"))
2285 {
2286 auto &fonts = stylesheet.fonts;
2287 auto count = parser().attribute<std::size_t>("count", 0);
2288
2289 if (parser().attribute_present(qn("x14ac", "knownFonts")))
2290 {
2291 target_.enable_known_fonts();
2292 }
2293
2294 while (in_element(qn("spreadsheetml", "fonts")))
2295 {
2296 fonts.push_back(xlnt::font());
2297 auto &new_font = stylesheet.fonts.back();
2298
2299 expect_start_element(qn("spreadsheetml", "font"), xml::content::complex);
2300
2301 while (in_element(qn("spreadsheetml", "font")))
2302 {
2303 auto font_property_element = expect_start_element(xml::content::simple);
2304
2305 if (font_property_element == qn("spreadsheetml", "sz"))
2306 {
2307 new_font.size(converter_.deserialise(parser().attribute("val")));
2308 }
2309 else if (font_property_element == qn("spreadsheetml", "name"))
2310 {
2311 new_font.name(parser().attribute("val"));
2312 }
2313 else if (font_property_element == qn("spreadsheetml", "color"))
2314 {
2315 new_font.color(read_color());
2316 }
2317 else if (font_property_element == qn("spreadsheetml", "family"))
2318 {
2319 new_font.family(parser().attribute<std::size_t>("val"));
2320 }
2321 else if (font_property_element == qn("spreadsheetml", "scheme"))
2322 {
2323 new_font.scheme(parser().attribute("val"));
2324 }
2325 else if (font_property_element == qn("spreadsheetml", "b"))
2326 {
2327 if (parser().attribute_present("val"))
2328 {
2329 new_font.bold(is_true(parser().attribute("val")));
2330 }
2331 else
2332 {
2333 new_font.bold(true);
2334 }
2335 }
2336 else if (font_property_element == qn("spreadsheetml", "vertAlign"))
2337 {
2338 auto vert_align = parser().attribute("val");
2339
2340 if (vert_align == "superscript")
2341 {
2342 new_font.superscript(true);
2343 }
2344 else if (vert_align == "subscript")
2345 {
2346 new_font.subscript(true);
2347 }
2348 }
2349 else if (font_property_element == qn("spreadsheetml", "strike"))
2350 {
2351 if (parser().attribute_present("val"))
2352 {
2353 new_font.strikethrough(is_true(parser().attribute("val")));
2354 }
2355 else
2356 {
2357 new_font.strikethrough(true);
2358 }
2359 }
2360 else if (font_property_element == qn("spreadsheetml", "outline"))
2361 {
2362 if (parser().attribute_present("val"))
2363 {
2364 new_font.outline(is_true(parser().attribute("val")));
2365 }
2366 else
2367 {
2368 new_font.outline(true);
2369 }
2370 }
2371 else if (font_property_element == qn("spreadsheetml", "shadow"))
2372 {
2373 if (parser().attribute_present("val"))
2374 {
2375 new_font.shadow(is_true(parser().attribute("val")));
2376 }
2377 else
2378 {
2379 new_font.shadow(true);
2380 }
2381 }
2382 else if (font_property_element == qn("spreadsheetml", "i"))
2383 {
2384 if (parser().attribute_present("val"))
2385 {
2386 new_font.italic(is_true(parser().attribute("val")));
2387 }
2388 else
2389 {
2390 new_font.italic(true);
2391 }
2392 }
2393 else if (font_property_element == qn("spreadsheetml", "u"))
2394 {
2395 if (parser().attribute_present("val"))
2396 {
2397 new_font.underline(parser().attribute<xlnt::font::underline_style>("val"));
2398 }
2399 else
2400 {
2401 new_font.underline(xlnt::font::underline_style::single);
2402 }
2403 }
2404 else if (font_property_element == qn("spreadsheetml", "charset"))
2405 {
2406 if (parser().attribute_present("val"))
2407 {
2408 parser().attribute("val");
2409 }
2410 }
2411 else
2412 {
2413 unexpected_element(font_property_element);
2414 }
2415
2416 expect_end_element(font_property_element);
2417 }
2418
2419 expect_end_element(qn("spreadsheetml", "font"));
2420 }
2421
2422 if (count != stylesheet.fonts.size())
2423 {
2424 // throw xlnt::exception("counts don't match");
2425 }
2426 }
2427 else if (current_style_element == qn("spreadsheetml", "numFmts"))
2428 {
2429 auto &number_formats = stylesheet.number_formats;
2430 auto count = parser().attribute<std::size_t>("count");
2431
2432 while (in_element(qn("spreadsheetml", "numFmts")))
2433 {
2434 expect_start_element(qn("spreadsheetml", "numFmt"), xml::content::simple);
2435
2436 auto format_string = parser().attribute("formatCode");
2437
2438 if (format_string == "GENERAL")
2439 {
2440 format_string = "General";
2441 }
2442
2443 xlnt::number_format nf;
2444
2445 nf.format_string(format_string);
2446 nf.id(parser().attribute<std::size_t>("numFmtId"));
2447
2448 expect_end_element(qn("spreadsheetml", "numFmt"));
2449
2450 number_formats.push_back(nf);
2451 }
2452
2453 if (count != number_formats.size())
2454 {
2455 throw xlnt::exception("counts don't match");
2456 }
2457 }
2458 else if (current_style_element == qn("spreadsheetml", "cellStyles"))
2459 {
2460 auto count = parser().attribute<std::size_t>("count");
2461
2462 while (in_element(qn("spreadsheetml", "cellStyles")))
2463 {
2464 auto &data = *styles.emplace(styles.end());
2465
2466 expect_start_element(qn("spreadsheetml", "cellStyle"), xml::content::simple);
2467
2468 data.first.name = parser().attribute("name");
2469 data.second = parser().attribute<std::size_t>("xfId");
2470
2471 if (parser().attribute_present("builtinId"))
2472 {
2473 data.first.builtin_id = parser().attribute<std::size_t>("builtinId");
2474 }
2475
2476 if (parser().attribute_present("hidden"))
2477 {
2478 data.first.hidden_style = is_true(parser().attribute("hidden"));
2479 }
2480
2481 if (parser().attribute_present("customBuiltin"))
2482 {
2483 data.first.custom_builtin = is_true(parser().attribute("customBuiltin"));
2484 }
2485
2486 expect_end_element(qn("spreadsheetml", "cellStyle"));
2487 }
2488
2489 if (count != styles.size())
2490 {
2491 throw xlnt::exception("counts don't match");
2492 }
2493 }
2494 else if (current_style_element == qn("spreadsheetml", "cellStyleXfs")
2495 || current_style_element == qn("spreadsheetml", "cellXfs"))
2496 {
2497 auto in_style_records = current_style_element.name() == "cellStyleXfs";
2498 auto count = parser().attribute<std::size_t>("count");
2499
2500 while (in_element(current_style_element))
2501 {
2502 expect_start_element(qn("spreadsheetml", "xf"), xml::content::complex);
2503
2504 auto &record = *(!in_style_records
2505 ? format_records.emplace(format_records.end())
2506 : style_records.emplace(style_records.end()));
2507
2508 if (parser().attribute_present("applyBorder"))
2509 {
2510 record.first.border_applied = is_true(parser().attribute("applyBorder"));
2511 }
2512 record.first.border_id = parser().attribute_present("borderId")
2513 ? parser().attribute<std::size_t>("borderId")
2514 : optional<std::size_t>();
2515
2516 if (parser().attribute_present("applyFill"))
2517 {
2518 record.first.fill_applied = is_true(parser().attribute("applyFill"));
2519 }
2520 record.first.fill_id = parser().attribute_present("fillId")
2521 ? parser().attribute<std::size_t>("fillId")
2522 : optional<std::size_t>();
2523
2524 if (parser().attribute_present("applyFont"))
2525 {
2526 record.first.font_applied = is_true(parser().attribute("applyFont"));
2527 }
2528 record.first.font_id = parser().attribute_present("fontId")
2529 ? parser().attribute<std::size_t>("fontId")
2530 : optional<std::size_t>();
2531
2532 if (parser().attribute_present("applyNumberFormat"))
2533 {
2534 record.first.number_format_applied = is_true(parser().attribute("applyNumberFormat"));
2535 }
2536 record.first.number_format_id = parser().attribute_present("numFmtId")
2537 ? parser().attribute<std::size_t>("numFmtId")
2538 : optional<std::size_t>();
2539
2540 auto apply_alignment_present = parser().attribute_present("applyAlignment");
2541 if (apply_alignment_present)
2542 {
2543 record.first.alignment_applied = is_true(parser().attribute("applyAlignment"));
2544 }
2545
2546 auto apply_protection_present = parser().attribute_present("applyProtection");
2547 if (apply_protection_present)
2548 {
2549 record.first.protection_applied = is_true(parser().attribute("applyProtection"));
2550 }
2551
2552 record.first.pivot_button_ = parser().attribute_present("pivotButton")
2553 && is_true(parser().attribute("pivotButton"));
2554 record.first.quote_prefix_ = parser().attribute_present("quotePrefix")
2555 && is_true(parser().attribute("quotePrefix"));
2556
2557 if (parser().attribute_present("xfId"))
2558 {
2559 record.second = parser().attribute<std::size_t>("xfId");
2560 }
2561
2562 while (in_element(qn("spreadsheetml", "xf")))
2563 {
2564 auto xf_child_element = expect_start_element(xml::content::simple);
2565
2566 if (xf_child_element == qn("spreadsheetml", "alignment"))
2567 {
2568 record.first.alignment_id = stylesheet.alignments.size();
2569 auto &alignment = *stylesheet.alignments.emplace(stylesheet.alignments.end());
2570
2571 if (parser().attribute_present("wrapText"))
2572 {
2573 alignment.wrap(is_true(parser().attribute("wrapText")));
2574 }
2575
2576 if (parser().attribute_present("shrinkToFit"))
2577 {
2578 alignment.shrink(is_true(parser().attribute("shrinkToFit")));
2579 }
2580
2581 if (parser().attribute_present("indent"))
2582 {
2583 alignment.indent(parser().attribute<int>("indent"));
2584 }
2585
2586 if (parser().attribute_present("textRotation"))
2587 {
2588 alignment.rotation(parser().attribute<int>("textRotation"));
2589 }
2590
2591 if (parser().attribute_present("vertical"))
2592 {
2593 alignment.vertical(parser().attribute<xlnt::vertical_alignment>("vertical"));
2594 }
2595
2596 if (parser().attribute_present("horizontal"))
2597 {
2598 alignment.horizontal(parser().attribute<xlnt::horizontal_alignment>("horizontal"));
2599 }
2600
2601 if (parser().attribute_present("readingOrder"))
2602 {
2603 parser().attribute<int>("readingOrder");
2604 }
2605 }
2606 else if (xf_child_element == qn("spreadsheetml", "protection"))
2607 {
2608 record.first.protection_id = stylesheet.protections.size();
2609 auto &protection = *stylesheet.protections.emplace(stylesheet.protections.end());
2610
2611 protection.locked(parser().attribute_present("locked")
2612 && is_true(parser().attribute("locked")));
2613 protection.hidden(parser().attribute_present("hidden")
2614 && is_true(parser().attribute("hidden")));
2615 }
2616 else
2617 {
2618 unexpected_element(xf_child_element);
2619 }
2620
2621 expect_end_element(xf_child_element);
2622 }
2623
2624 expect_end_element(qn("spreadsheetml", "xf"));
2625 }
2626
2627 if ((in_style_records && count != style_records.size())
2628 || (!in_style_records && count != format_records.size()))
2629 {
2630 throw xlnt::exception("counts don't match");
2631 }
2632 }
2633 else if (current_style_element == qn("spreadsheetml", "dxfs"))
2634 {
2635 auto count = parser().attribute<std::size_t>("count");
2636 std::size_t processed = 0;
2637
2638 while (in_element(current_style_element))
2639 {
2640 auto current_element = expect_start_element(xml::content::mixed);
2641 skip_remaining_content(current_element);
2642 expect_end_element(current_element);
2643 ++processed;
2644 }
2645
2646 if (count != processed)
2647 {
2648 throw xlnt::exception("counts don't match");
2649 }
2650 }
2651 else if (current_style_element == qn("spreadsheetml", "tableStyles"))
2652 {
2653 skip_attribute("defaultTableStyle");
2654 skip_attribute("defaultPivotStyle");
2655
2656 auto count = parser().attribute<std::size_t>("count");
2657 std::size_t processed = 0;
2658
2659 while (in_element(qn("spreadsheetml", "tableStyles")))
2660 {
2661 auto current_element = expect_start_element(xml::content::complex);
2662 skip_remaining_content(current_element);
2663 expect_end_element(current_element);
2664 ++processed;
2665 }
2666
2667 if (count != processed)
2668 {
2669 throw xlnt::exception("counts don't match");
2670 }
2671 }
2672 else if (current_style_element == qn("spreadsheetml", "extLst"))
2673 {
2674 while (in_element(qn("spreadsheetml", "extLst")))
2675 {
2676 expect_start_element(qn("spreadsheetml", "ext"), xml::content::complex);
2677
2678 const auto uri = parser().attribute("uri");
2679
2680 if (uri == "{EB79DEF2-80B8-43e5-95BD-54CBDDF9020C}") // slicerStyles
2681 {
2682 expect_start_element(qn("x14", "slicerStyles"), xml::content::simple);
2683 stylesheet.default_slicer_style = parser().attribute("defaultSlicerStyle");
2684 expect_end_element(qn("x14", "slicerStyles"));
2685 }
2686 else
2687 {
2688 skip_remaining_content(qn("spreadsheetml", "ext"));
2689 }
2690
2691 expect_end_element(qn("spreadsheetml", "ext"));
2692 }
2693 }
2694 else if (current_style_element == qn("spreadsheetml", "colors")) // CT_Colors 0-1
2695 {
2696 while (in_element(qn("spreadsheetml", "colors")))
2697 {
2698 auto colors_child_element = expect_start_element(xml::content::complex);
2699
2700 if (colors_child_element == qn("spreadsheetml", "indexedColors")) // CT_IndexedColors 0-1
2701 {
2702 while (in_element(colors_child_element))
2703 {
2704 expect_start_element(qn("spreadsheetml", "rgbColor"), xml::content::simple);
2705 stylesheet.colors.push_back(read_color());
2706 expect_end_element(qn("spreadsheetml", "rgbColor"));
2707 }
2708 }
2709 else if (colors_child_element == qn("spreadsheetml", "mruColors")) // CT_MRUColors
2710 {
2711 skip_remaining_content(colors_child_element);
2712 }
2713 else
2714 {
2715 unexpected_element(colors_child_element);
2716 }
2717
2718 expect_end_element(colors_child_element);
2719 }
2720 }
2721 else
2722 {
2723 unexpected_element(current_style_element);
2724 }
2725
2726 expect_end_element(current_style_element);
2727 }
2728
2729 expect_end_element(qn("spreadsheetml", "styleSheet"));
2730
2731 std::size_t xf_id = 0;
2732
2733 for (const auto &record : style_records)
2734 {
2735 auto style_iter = std::find_if(styles.begin(), styles.end(),
2736 [&xf_id](const std::pair<style_impl, std::size_t> &s) { return s.second == xf_id; });
2737 ++xf_id;
2738
2739 if (style_iter == styles.end()) continue;
2740
2741 auto new_style = stylesheet.create_style(style_iter->first.name);
2742
2743 new_style.d_->pivot_button_ = style_iter->first.pivot_button_;
2744 new_style.d_->quote_prefix_ = style_iter->first.quote_prefix_;
2745 new_style.d_->formatting_record_id = style_iter->first.formatting_record_id;
2746 new_style.d_->hidden_style = style_iter->first.hidden_style;
2747 new_style.d_->custom_builtin = style_iter->first.custom_builtin;
2748 new_style.d_->hidden_style = style_iter->first.hidden_style;
2749 new_style.d_->builtin_id = style_iter->first.builtin_id;
2750 new_style.d_->outline_style = style_iter->first.outline_style;
2751
2752 new_style.d_->alignment_applied = record.first.alignment_applied;
2753 new_style.d_->alignment_id = record.first.alignment_id;
2754 new_style.d_->border_applied = record.first.border_applied;
2755 new_style.d_->border_id = record.first.border_id;
2756 new_style.d_->fill_applied = record.first.fill_applied;
2757 new_style.d_->fill_id = record.first.fill_id;
2758 new_style.d_->font_applied = record.first.font_applied;
2759 new_style.d_->font_id = record.first.font_id;
2760 new_style.d_->number_format_applied = record.first.number_format_applied;
2761 new_style.d_->number_format_id = record.first.number_format_id;
2762 }
2763
2764 std::size_t record_index = 0;
2765
2766 for (const auto &record : format_records)
2767 {
2768 stylesheet.format_impls.push_back(format_impl());
2769 auto &new_format = stylesheet.format_impls.back();
2770
2771 new_format.id = record_index++;
2772 new_format.parent = &stylesheet;
2773
2774 ++new_format.references;
2775
2776 new_format.alignment_id = record.first.alignment_id;
2777 new_format.alignment_applied = record.first.alignment_applied;
2778 new_format.border_id = record.first.border_id;
2779 new_format.border_applied = record.first.border_applied;
2780 new_format.fill_id = record.first.fill_id;
2781 new_format.fill_applied = record.first.fill_applied;
2782 new_format.font_id = record.first.font_id;
2783 new_format.font_applied = record.first.font_applied;
2784 new_format.number_format_id = record.first.number_format_id;
2785 new_format.number_format_applied = record.first.number_format_applied;
2786 new_format.protection_id = record.first.protection_id;
2787 new_format.protection_applied = record.first.protection_applied;
2788 new_format.pivot_button_ = record.first.pivot_button_;
2789 new_format.quote_prefix_ = record.first.quote_prefix_;
2790
2791 set_style_by_xfid(styles, record.second, new_format.style);
2792 }
2793 }
2794
read_theme()2795 void xlsx_consumer::read_theme()
2796 {
2797 auto workbook_rel = manifest().relationship(path("/"),
2798 relationship_type::office_document);
2799 auto theme_rel = manifest().relationship(workbook_rel.target().path(),
2800 relationship_type::theme);
2801 auto theme_path = manifest().canonicalize({workbook_rel, theme_rel});
2802
2803 target_.theme(theme());
2804
2805 if (manifest().has_relationship(theme_path, relationship_type::image))
2806 {
2807 read_part({workbook_rel, theme_rel,
2808 manifest().relationship(theme_path,
2809 relationship_type::image)});
2810 }
2811 }
2812
read_volatile_dependencies()2813 void xlsx_consumer::read_volatile_dependencies()
2814 {
2815 }
2816
2817 // Sheet Relationship Target Parts
2818
read_vml_drawings(worksheet)2819 void xlsx_consumer::read_vml_drawings(worksheet /*ws*/)
2820 {
2821 }
2822
read_comments(worksheet ws)2823 void xlsx_consumer::read_comments(worksheet ws)
2824 {
2825 std::vector<std::string> authors;
2826
2827 expect_start_element(qn("spreadsheetml", "comments"), xml::content::complex);
2828 // name space can be ignored
2829 skip_attribute(qn("mc", "Ignorable"));
2830 expect_start_element(qn("spreadsheetml", "authors"), xml::content::complex);
2831
2832 while (in_element(qn("spreadsheetml", "authors")))
2833 {
2834 expect_start_element(qn("spreadsheetml", "author"), xml::content::simple);
2835 authors.push_back(read_text());
2836 expect_end_element(qn("spreadsheetml", "author"));
2837 }
2838
2839 expect_end_element(qn("spreadsheetml", "authors"));
2840 expect_start_element(qn("spreadsheetml", "commentList"), xml::content::complex);
2841
2842 while (in_element(xml::qname(qn("spreadsheetml", "commentList"))))
2843 {
2844 expect_start_element(qn("spreadsheetml", "comment"), xml::content::complex);
2845
2846 skip_attribute("shapeId");
2847 auto cell_ref = parser().attribute("ref");
2848 auto author_id = parser().attribute<std::size_t>("authorId");
2849
2850 expect_start_element(qn("spreadsheetml", "text"), xml::content::complex);
2851
2852 ws.cell(cell_ref).comment(comment(read_rich_text(qn("spreadsheetml", "text")), authors.at(author_id)));
2853
2854 expect_end_element(qn("spreadsheetml", "text"));
2855
2856 if (in_element(xml::qname(qn("spreadsheetml", "comment"))))
2857 {
2858 expect_start_element(qn("mc", "AlternateContent"), xml::content::complex);
2859 skip_remaining_content(qn("mc", "AlternateContent"));
2860 expect_end_element(qn("mc", "AlternateContent"));
2861 }
2862
2863 expect_end_element(qn("spreadsheetml", "comment"));
2864 }
2865
2866 expect_end_element(qn("spreadsheetml", "commentList"));
2867 expect_end_element(qn("spreadsheetml", "comments"));
2868 }
2869
read_drawings(worksheet ws,const path & part)2870 void xlsx_consumer::read_drawings(worksheet ws, const path &part)
2871 {
2872 auto images = manifest().relationships(part, relationship_type::image);
2873
2874 auto sd = drawing::spreadsheet_drawing(parser());
2875
2876 for (const auto &image_rel_id : sd.get_embed_ids())
2877 {
2878 auto image_rel = std::find_if(images.begin(), images.end(),
2879 [&](const relationship &r) { return r.id() == image_rel_id; });
2880
2881 if (image_rel != images.end())
2882 {
2883 const auto url = image_rel->target().path().resolve(part.parent());
2884
2885 read_image(url);
2886 }
2887 }
2888
2889 ws.d_->drawing_ = sd;
2890 }
2891
2892 // Unknown Parts
2893
read_unknown_parts()2894 void xlsx_consumer::read_unknown_parts()
2895 {
2896 }
2897
read_unknown_relationships()2898 void xlsx_consumer::read_unknown_relationships()
2899 {
2900 }
2901
read_image(const xlnt::path & image_path)2902 void xlsx_consumer::read_image(const xlnt::path &image_path)
2903 {
2904 auto image_streambuf = archive_->open(image_path);
2905 vector_ostreambuf buffer(target_.d_->images_[image_path.string()]);
2906 std::ostream out_stream(&buffer);
2907 out_stream << image_streambuf.get();
2908 }
2909
read_text()2910 std::string xlsx_consumer::read_text()
2911 {
2912 auto text = std::string();
2913
2914 while (parser().peek() == xml::parser::event_type::characters)
2915 {
2916 parser().next_expect(xml::parser::event_type::characters);
2917 text.append(parser().value());
2918 }
2919
2920 return text;
2921 }
2922
read_variant()2923 variant xlsx_consumer::read_variant()
2924 {
2925 auto value = variant(read_text());
2926
2927 if (in_element(stack_.back()))
2928 {
2929 auto element = expect_start_element(xml::content::mixed);
2930 auto text = read_text();
2931
2932 if (element == qn("vt", "lpwstr") || element == qn("vt", "lpstr"))
2933 {
2934 value = variant(text);
2935 }
2936 if (element == qn("vt", "i4"))
2937 {
2938 value = variant(std::stoi(text));
2939 }
2940 if (element == qn("vt", "bool"))
2941 {
2942 value = variant(is_true(text));
2943 }
2944 else if (element == qn("vt", "vector"))
2945 {
2946 auto size = parser().attribute<std::size_t>("size");
2947 auto base_type = parser().attribute("baseType");
2948
2949 std::vector<variant> vector;
2950
2951 for (auto i = std::size_t(0); i < size; ++i)
2952 {
2953 if (base_type == "variant")
2954 {
2955 expect_start_element(qn("vt", "variant"), xml::content::complex);
2956 }
2957
2958 vector.push_back(read_variant());
2959
2960 if (base_type == "variant")
2961 {
2962 expect_end_element(qn("vt", "variant"));
2963 read_text();
2964 }
2965 }
2966
2967 value = variant(vector);
2968 }
2969
2970 expect_end_element(element);
2971 read_text();
2972 }
2973
2974 return value;
2975 }
2976
skip_attributes(const std::vector<std::string> & names)2977 void xlsx_consumer::skip_attributes(const std::vector<std::string> &names)
2978 {
2979 for (const auto &name : names)
2980 {
2981 if (parser().attribute_present(name))
2982 {
2983 parser().attribute(name);
2984 }
2985 }
2986 }
2987
skip_attributes(const std::vector<xml::qname> & names)2988 void xlsx_consumer::skip_attributes(const std::vector<xml::qname> &names)
2989 {
2990 for (const auto &name : names)
2991 {
2992 if (parser().attribute_present(name))
2993 {
2994 parser().attribute(name);
2995 }
2996 }
2997 }
2998
skip_attributes()2999 void xlsx_consumer::skip_attributes()
3000 {
3001 parser().attribute_map();
3002 }
3003
skip_attribute(const xml::qname & name)3004 void xlsx_consumer::skip_attribute(const xml::qname &name)
3005 {
3006 if (parser().attribute_present(name))
3007 {
3008 parser().attribute(name);
3009 }
3010 }
3011
skip_attribute(const std::string & name)3012 void xlsx_consumer::skip_attribute(const std::string &name)
3013 {
3014 if (parser().attribute_present(name))
3015 {
3016 parser().attribute(name);
3017 }
3018 }
3019
skip_remaining_content(const xml::qname & name)3020 void xlsx_consumer::skip_remaining_content(const xml::qname &name)
3021 {
3022 // start by assuming we've already parsed the opening tag
3023
3024 skip_attributes();
3025 read_text();
3026
3027 // continue until the closing tag is reached
3028 while (in_element(name))
3029 {
3030 auto child_element = expect_start_element(xml::content::mixed);
3031 skip_remaining_content(child_element);
3032 expect_end_element(child_element);
3033 read_text(); // trailing character content (usually whitespace)
3034 }
3035 }
3036
in_element(const xml::qname & name)3037 bool xlsx_consumer::in_element(const xml::qname &name)
3038 {
3039 return parser().peek() != xml::parser::event_type::end_element
3040 && stack_.back() == name;
3041 }
3042
expect_start_element(xml::content content)3043 xml::qname xlsx_consumer::expect_start_element(xml::content content)
3044 {
3045 parser().next_expect(xml::parser::event_type::start_element);
3046 parser().content(content);
3047 stack_.push_back(parser().qname());
3048
3049 const auto xml_space = qn("xml", "space");
3050 preserve_space_ = parser().attribute_present(xml_space) ? parser().attribute(xml_space) == "preserve" : false;
3051
3052 return stack_.back();
3053 }
3054
expect_start_element(const xml::qname & name,xml::content content)3055 void xlsx_consumer::expect_start_element(const xml::qname &name, xml::content content)
3056 {
3057 parser().next_expect(xml::parser::event_type::start_element, name);
3058 parser().content(content);
3059 stack_.push_back(name);
3060
3061 const auto xml_space = qn("xml", "space");
3062 preserve_space_ = parser().attribute_present(xml_space) ? parser().attribute(xml_space) == "preserve" : false;
3063 }
3064
expect_end_element(const xml::qname & name)3065 void xlsx_consumer::expect_end_element(const xml::qname &name)
3066 {
3067 parser().attribute_map();
3068 parser().next_expect(xml::parser::event_type::end_element, name);
3069 stack_.pop_back();
3070 }
3071
unexpected_element(const xml::qname & name)3072 void xlsx_consumer::unexpected_element(const xml::qname &name)
3073 {
3074 #ifdef THROW_ON_INVALID_XML
3075 throw xlnt::exception(name.string());
3076 #else
3077 skip_remaining_content(name);
3078 #endif
3079 }
3080
read_rich_text(const xml::qname & parent)3081 rich_text xlsx_consumer::read_rich_text(const xml::qname &parent)
3082 {
3083 const auto &xmlns = parent.namespace_();
3084 rich_text t;
3085
3086 while (in_element(parent))
3087 {
3088 auto text_element = expect_start_element(xml::content::mixed);
3089 const auto xml_space = qn("xml", "space");
3090 const auto preserve_space = parser().attribute_present(xml_space)
3091 ? parser().attribute(xml_space) == "preserve"
3092 : false;
3093 skip_attributes();
3094 auto text = read_text();
3095
3096 if (text_element == xml::qname(xmlns, "t"))
3097 {
3098 t.plain_text(text, preserve_space);
3099 }
3100 else if (text_element == xml::qname(xmlns, "r"))
3101 {
3102 rich_text_run run;
3103 run.preserve_space = preserve_space;
3104
3105 while (in_element(xml::qname(xmlns, "r")))
3106 {
3107 auto run_element = expect_start_element(xml::content::mixed);
3108 auto run_text = read_text();
3109
3110 if (run_element == xml::qname(xmlns, "rPr"))
3111 {
3112 run.second = xlnt::font();
3113
3114 while (in_element(xml::qname(xmlns, "rPr")))
3115 {
3116 auto current_run_property_element = expect_start_element(xml::content::simple);
3117
3118 if (current_run_property_element == xml::qname(xmlns, "sz"))
3119 {
3120 run.second.get().size(converter_.deserialise(parser().attribute("val")));
3121 }
3122 else if (current_run_property_element == xml::qname(xmlns, "rFont"))
3123 {
3124 run.second.get().name(parser().attribute("val"));
3125 }
3126 else if (current_run_property_element == xml::qname(xmlns, "color"))
3127 {
3128 run.second.get().color(read_color());
3129 }
3130 else if (current_run_property_element == xml::qname(xmlns, "family"))
3131 {
3132 run.second.get().family(parser().attribute<std::size_t>("val"));
3133 }
3134 else if (current_run_property_element == xml::qname(xmlns, "charset"))
3135 {
3136 run.second.get().charset(parser().attribute<std::size_t>("val"));
3137 }
3138 else if (current_run_property_element == xml::qname(xmlns, "scheme"))
3139 {
3140 run.second.get().scheme(parser().attribute("val"));
3141 }
3142 else if (current_run_property_element == xml::qname(xmlns, "b"))
3143 {
3144 run.second.get().bold(parser().attribute_present("val")
3145 ? is_true(parser().attribute("val"))
3146 : true);
3147 }
3148 else if (current_run_property_element == xml::qname(xmlns, "i"))
3149 {
3150 run.second.get().italic(parser().attribute_present("val")
3151 ? is_true(parser().attribute("val"))
3152 : true);
3153 }
3154 else if (current_run_property_element == xml::qname(xmlns, "u"))
3155 {
3156 if (parser().attribute_present("val"))
3157 {
3158 run.second.get().underline(parser().attribute<font::underline_style>("val"));
3159 }
3160 else
3161 {
3162 run.second.get().underline(font::underline_style::single);
3163 }
3164 }
3165 else if (current_run_property_element == xml::qname(xmlns, "strike"))
3166 {
3167 run.second.get().strikethrough(parser().attribute_present("val")
3168 ? is_true(parser().attribute("val"))
3169 : true);
3170 }
3171 else
3172 {
3173 unexpected_element(current_run_property_element);
3174 }
3175
3176 expect_end_element(current_run_property_element);
3177 read_text();
3178 }
3179 }
3180 else if (run_element == xml::qname(xmlns, "t"))
3181 {
3182 run.first = run_text;
3183 }
3184 else
3185 {
3186 unexpected_element(run_element);
3187 }
3188
3189 read_text();
3190 expect_end_element(run_element);
3191 read_text();
3192 }
3193
3194 t.add_run(run);
3195 }
3196 else if (text_element == xml::qname(xmlns, "rPh"))
3197 {
3198 phonetic_run pr;
3199 pr.start = parser().attribute<std::uint32_t>("sb");
3200 pr.end = parser().attribute<std::uint32_t>("eb");
3201
3202 expect_start_element(xml::qname(xmlns, "t"), xml::content::simple);
3203 pr.text = read_text();
3204
3205 if (parser().attribute_present(xml_space))
3206 {
3207 pr.preserve_space = parser().attribute(xml_space) == "preserve";
3208 }
3209
3210 expect_end_element(xml::qname(xmlns, "t"));
3211
3212 t.add_phonetic_run(pr);
3213 }
3214 else if (text_element == xml::qname(xmlns, "phoneticPr"))
3215 {
3216 phonetic_pr ph(parser().attribute<phonetic_pr::font_id_t>("fontId"));
3217 if (parser().attribute_present("type"))
3218 {
3219 ph.type(phonetic_pr::type_from_string(parser().attribute("type")));
3220 }
3221 if (parser().attribute_present("alignment"))
3222 {
3223 ph.alignment(phonetic_pr::alignment_from_string(parser().attribute("alignment")));
3224 }
3225 t.phonetic_properties(ph);
3226 }
3227 else
3228 {
3229 unexpected_element(text_element);
3230 }
3231
3232 read_text();
3233 expect_end_element(text_element);
3234 }
3235
3236 return t;
3237 }
3238
read_color()3239 xlnt::color xlsx_consumer::read_color()
3240 {
3241 xlnt::color result;
3242
3243 if (parser().attribute_present("auto") && is_true(parser().attribute("auto")))
3244 {
3245 result.auto_(true);
3246 return result;
3247 }
3248
3249 if (parser().attribute_present("rgb"))
3250 {
3251 result = xlnt::rgb_color(parser().attribute("rgb"));
3252 }
3253 else if (parser().attribute_present("theme"))
3254 {
3255 result = xlnt::theme_color(parser().attribute<std::size_t>("theme"));
3256 }
3257 else if (parser().attribute_present("indexed"))
3258 {
3259 result = xlnt::indexed_color(parser().attribute<std::size_t>("indexed"));
3260 }
3261
3262 if (parser().attribute_present("tint"))
3263 {
3264 result.tint(converter_.deserialise(parser().attribute("tint")));
3265 }
3266
3267 return result;
3268 }
3269
manifest()3270 manifest &xlsx_consumer::manifest()
3271 {
3272 return target_.manifest();
3273 }
3274
3275 } // namespace detail
3276 } // namespace xlnt
3277