1 // Copyright (c) 2014-2020 Thomas Fussell
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, WRISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE
20 //
21 // @license: http://www.opensource.org/licenses/mit-license.php
22 // @author: see AUTHORS file
23 
24 #include <cassert>
25 #include <cctype>
26 #include <numeric> // for std::accumulate
27 #include <sstream>
28 #include <unordered_map>
29 
30 #include <xlnt/cell/cell.hpp>
31 #include <xlnt/cell/comment.hpp>
32 #include <xlnt/cell/hyperlink.hpp>
33 #include <xlnt/drawing/spreadsheet_drawing.hpp>
34 #include <xlnt/packaging/manifest.hpp>
35 #include <xlnt/utils/optional.hpp>
36 #include <xlnt/utils/path.hpp>
37 #include <xlnt/workbook/workbook.hpp>
38 #include <xlnt/worksheet/selection.hpp>
39 #include <xlnt/worksheet/worksheet.hpp>
40 #include <detail/constants.hpp>
41 #include <detail/header_footer/header_footer_code.hpp>
42 #include <detail/implementations/workbook_impl.hpp>
43 #include <detail/serialization/custom_value_traits.hpp>
44 #include <detail/serialization/serialisation_helpers.hpp>
45 #include <detail/serialization/vector_streambuf.hpp>
46 #include <detail/serialization/xlsx_consumer.hpp>
47 #include <detail/serialization/zstream.hpp>
48 
49 namespace {
50 /// string_equal
51 /// for comparison between std::string and string literals
52 /// improves on std::string::operator==(char*) by knowing the length ahead of time
53 template <size_t N>
string_arr_loop_equal(const std::string & lhs,const char (& rhs)[N])54 inline bool string_arr_loop_equal(const std::string &lhs, const char (&rhs)[N])
55 {
56     for (size_t i = 0; i < N - 1; ++i)
57     {
58         if (lhs[i] != rhs[i])
59         {
60             return false;
61         }
62     }
63     return true;
64 }
65 
66 template <size_t N>
string_equal(const std::string & lhs,const char (& rhs)[N])67 inline bool string_equal(const std::string &lhs, const char (&rhs)[N])
68 {
69     if (lhs.size() != N - 1)
70     {
71         return false;
72     }
73     // split function to assist with inlining of the size check
74     return string_arr_loop_equal(lhs, rhs);
75 }
76 
qn(const std::string & namespace_,const std::string & name)77 xml::qname &qn(const std::string &namespace_, const std::string &name)
78 {
79     using qname_map = std::unordered_map<std::string, xml::qname>;
80     static auto memo = std::unordered_map<std::string, qname_map>();
81 
82     auto &ns_memo = memo[namespace_];
83 
84     if (ns_memo.find(name) == ns_memo.end())
85     {
86         return ns_memo.emplace(name, xml::qname(xlnt::constants::ns(namespace_), name)).first->second;
87     }
88 
89     return ns_memo[name];
90 }
91 
92 /// <summary>
93 /// Returns true if bool_string represents a true xsd:boolean.
94 /// </summary>
is_true(const std::string & bool_string)95 bool is_true(const std::string &bool_string)
96 {
97     if (bool_string == "1" || bool_string == "true")
98     {
99         return true;
100     }
101 
102 #ifdef THROW_ON_INVALID_XML
103     if (bool_string == "0" || bool_string == "false")
104     {
105         return false;
106     }
107 
108     throw xlnt::exception("xsd:boolean should be one of: 0, 1, true, or false, found " + bool_string);
109 #else
110 
111     return false;
112 #endif
113 }
114 
115 using style_id_pair = std::pair<xlnt::detail::style_impl, std::size_t>;
116 
117 /// <summary>
118 /// Try to find given xfid value in the styles vector and, if succeeded, set's the optional style.
119 /// </summary>
set_style_by_xfid(const std::vector<style_id_pair> & styles,std::size_t xfid,xlnt::optional<std::string> & style)120 void set_style_by_xfid(const std::vector<style_id_pair> &styles,
121     std::size_t xfid, xlnt::optional<std::string> &style)
122 {
123     for (auto &item : styles)
124     {
125         if (item.second == xfid)
126         {
127             style = item.first.name;
128         }
129     }
130 }
131 
132 // <sheetData> element
133 struct Sheet_Data
134 {
135     std::vector<std::pair<xlnt::row_properties, xlnt::row_t>> parsed_rows;
136     std::vector<xlnt::detail::Cell> parsed_cells;
137 };
138 
type_from_string(const std::string & str)139 xlnt::cell_type type_from_string(const std::string &str)
140 {
141     if (string_equal(str, "s"))
142     {
143         return xlnt::cell::type::shared_string;
144     }
145     else if (string_equal(str, "n"))
146     {
147         return xlnt::cell::type::number;
148     }
149     else if (string_equal(str, "b"))
150     {
151         return xlnt::cell::type::boolean;
152     }
153     else if (string_equal(str, "e"))
154     {
155         return xlnt::cell::type::error;
156     }
157     else if (string_equal(str, "inlineStr"))
158     {
159         return xlnt::cell::type::inline_string;
160     }
161     else if (string_equal(str, "str"))
162     {
163         return xlnt::cell::type::formula_string;
164     }
165     return xlnt::cell::type::shared_string;
166 }
167 
parse_cell(xlnt::row_t row_arg,xml::parser * parser)168 xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser)
169 {
170     xlnt::detail::Cell c;
171     for (auto &attr : parser->attribute_map())
172     {
173         if (string_equal(attr.first.name(), "r"))
174         {
175             c.ref = xlnt::detail::Cell_Reference(row_arg, attr.second.value);
176         }
177         else if (string_equal(attr.first.name(), "t"))
178         {
179             c.type = type_from_string(attr.second.value);
180         }
181         else if (string_equal(attr.first.name(), "s"))
182         {
183             c.style_index = static_cast<int>(strtol(attr.second.value.c_str(), nullptr, 10));
184         }
185         else if (string_equal(attr.first.name(), "ph"))
186         {
187             c.is_phonetic = is_true(attr.second.value);
188         }
189         else if (string_equal(attr.first.name(), "cm"))
190         {
191             c.cell_metatdata_idx = static_cast<int>(strtol(attr.second.value.c_str(), nullptr, 10));
192         }
193     }
194     int level = 1; // nesting level
195         // 1 == <c>
196         // 2 == <v>/<f>
197         // 3 == <is><t>
198         // exit loop at </c>
199     while (level > 0)
200     {
201         xml::parser::event_type e = parser->next();
202         switch (e)
203         {
204         case xml::parser::start_element: {
205             ++level;
206             break;
207         }
208         case xml::parser::end_element: {
209             --level;
210             break;
211         }
212         case xml::parser::characters: {
213             // only want the characters inside one of the nested tags
214             // without this a lot of formatting whitespace can get added
215             if (level == 2)
216             {
217                 // <v> -> numeric values
218                 if (string_equal(parser->name(), "v"))
219                 {
220                     c.value += std::move(parser->value());
221                 }
222                 // <f> formula
223                 else if (string_equal(parser->name(), "f"))
224                 {
225                     c.formula_string += std::move(parser->value());
226                 }
227             }
228             else if (level == 3)
229             {
230                 // <is><t> -> inline string
231                 if (string_equal(parser->name(), "t"))
232                 {
233                     c.value += std::move(parser->value());
234                 }
235             }
236             break;
237         }
238         case xml::parser::start_namespace_decl:
239         case xml::parser::end_namespace_decl:
240         case xml::parser::start_attribute:
241         case xml::parser::end_attribute:
242         case xml::parser::eof:
243         default: {
244             throw xlnt::exception("unexcpected XML parsing event");
245         }
246         }
247         // Prevents unhandled exceptions from being triggered.
248         parser->attribute_map();
249     }
250     return c;
251 }
252 
253 // <row> inside <sheetData> element
parse_row(xml::parser * parser,xlnt::detail::number_serialiser & converter,std::vector<xlnt::detail::Cell> & parsed_cells)254 std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail::number_serialiser &converter, std::vector<xlnt::detail::Cell> &parsed_cells)
255 {
256     std::pair<xlnt::row_properties, int> props;
257     for (auto &attr : parser->attribute_map())
258     {
259         if (string_equal(attr.first.name(), "dyDescent"))
260         {
261             props.first.dy_descent = converter.deserialise(attr.second.value);
262         }
263         else if (string_equal(attr.first.name(), "spans"))
264         {
265             props.first.spans = attr.second.value;
266         }
267         else if (string_equal(attr.first.name(), "ht"))
268         {
269             props.first.height = converter.deserialise(attr.second.value);
270         }
271         else if (string_equal(attr.first.name(), "s"))
272         {
273             props.first.style = strtoul(attr.second.value.c_str(), nullptr, 10);
274         }
275         else if (string_equal(attr.first.name(), "hidden"))
276         {
277             props.first.hidden = is_true(attr.second.value);
278         }
279         else if (string_equal(attr.first.name(), "customFormat"))
280         {
281             props.first.custom_format = is_true(attr.second.value);
282         }
283         else if (string_equal(attr.first.name(), "ph"))
284         {
285             is_true(attr.second.value);
286         }
287         else if (string_equal(attr.first.name(), "r"))
288         {
289             props.second = static_cast<int>(strtol(attr.second.value.c_str(), nullptr, 10));
290         }
291         else if (string_equal(attr.first.name(), "customHeight"))
292         {
293             props.first.custom_height = is_true(attr.second.value.c_str());
294         }
295     }
296 
297     int level = 1;
298     while (level > 0)
299     {
300         xml::parser::event_type e = parser->next();
301         switch (e)
302         {
303         case xml::parser::start_element: {
304             parsed_cells.push_back(parse_cell(static_cast<xlnt::row_t>(props.second), parser));
305             break;
306         }
307         case xml::parser::end_element: {
308             --level;
309             break;
310         }
311         case xml::parser::characters: {
312             // ignore whitespace
313             break;
314         }
315         case xml::parser::start_namespace_decl:
316         case xml::parser::start_attribute:
317         case xml::parser::end_namespace_decl:
318         case xml::parser::end_attribute:
319         case xml::parser::eof:
320         default: {
321             throw xlnt::exception("unexcpected XML parsing event");
322         }
323         }
324     }
325     return props;
326 }
327 
328 // <sheetData> inside <worksheet> element
parse_sheet_data(xml::parser * parser,xlnt::detail::number_serialiser & converter)329 Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser &converter)
330 {
331     Sheet_Data sheet_data;
332     int level = 1; // nesting level
333         // 1 == <sheetData>
334         // 2 == <row>
335 
336     while (level > 0)
337     {
338         xml::parser::event_type e = parser->next();
339         switch (e)
340         {
341         case xml::parser::start_element: {
342             sheet_data.parsed_rows.push_back(parse_row(parser, converter, sheet_data.parsed_cells));
343             break;
344         }
345         case xml::parser::end_element: {
346             --level;
347             break;
348         }
349         case xml::parser::characters: {
350             // ignore, whitespace formatting normally
351             break;
352         }
353         case xml::parser::start_namespace_decl:
354         case xml::parser::start_attribute:
355         case xml::parser::end_namespace_decl:
356         case xml::parser::end_attribute:
357         case xml::parser::eof:
358         default: {
359             throw xlnt::exception("unexcpected XML parsing event");
360         }
361         }
362     }
363     return sheet_data;
364 }
365 
366 } // namespace
367 
368 /*
369 class parsing_context
370 {
371 public:
372     parsing_context(xlnt::detail::zip_file_reader &archive, const std::string &filename)
373         : parser_(stream_, filename)
374     {
375     }
376 
377     xml::parser &parser();
378 
379 private:
380     std::istream stream_;
381     xml::parser parser_;
382 };
383 */
384 
385 namespace xlnt {
386 namespace detail {
387 
xlsx_consumer(workbook & target)388 xlsx_consumer::xlsx_consumer(workbook &target)
389     : target_(target),
390       parser_(nullptr)
391 {
392 }
393 
~xlsx_consumer()394 xlsx_consumer::~xlsx_consumer()
395 {
396 }
397 
read(std::istream & source)398 void xlsx_consumer::read(std::istream &source)
399 {
400     archive_.reset(new izstream(source));
401     populate_workbook(false);
402 }
403 
open(std::istream & source)404 void xlsx_consumer::open(std::istream &source)
405 {
406     archive_.reset(new izstream(source));
407     populate_workbook(true);
408 }
409 
read_cell()410 cell xlsx_consumer::read_cell()
411 {
412     return cell(streaming_cell_.get());
413 }
414 
read_worksheet(const std::string & rel_id)415 void xlsx_consumer::read_worksheet(const std::string &rel_id)
416 {
417     read_worksheet_begin(rel_id);
418 
419     if (!streaming_)
420     {
421         read_worksheet_sheetdata();
422         read_worksheet_end(rel_id);
423     }
424 }
425 
read_worksheet_begin(const std::string & rel_id)426 std::string xlsx_consumer::read_worksheet_begin(const std::string &rel_id)
427 {
428     if (streaming_ && streaming_cell_ == nullptr)
429     {
430         streaming_cell_.reset(new detail::cell_impl());
431     }
432 
433     auto title = std::find_if(target_.d_->sheet_title_rel_id_map_.begin(),
434         target_.d_->sheet_title_rel_id_map_.end(),
435         [&](const std::pair<std::string, std::string> &p) {
436             return p.second == rel_id;
437         })->first;
438 
439     auto ws = worksheet(current_worksheet_);
440 
441     expect_start_element(qn("spreadsheetml", "worksheet"), xml::content::complex); // CT_Worksheet
442     skip_attributes({qn("mc", "Ignorable")});
443 
444     while (in_element(qn("spreadsheetml", "worksheet")))
445     {
446         auto current_worksheet_element = expect_start_element(xml::content::complex);
447 
448         if (current_worksheet_element == qn("spreadsheetml", "sheetPr")) // CT_SheetPr 0-1
449         {
450             sheet_pr props;
451             if (parser().attribute_present("syncHorizontal"))
452             { // optional, boolean, false
453                 props.sync_horizontal.set(parser().attribute<bool>("syncHorizontal"));
454             }
455             if (parser().attribute_present("syncVertical"))
456             { // optional, boolean, false
457                 props.sync_vertical.set(parser().attribute<bool>("syncVertical"));
458             }
459             if (parser().attribute_present("syncRef"))
460             { // optional, ST_Ref, false
461                 props.sync_ref.set(cell_reference(parser().attribute("syncRef")));
462             }
463             if (parser().attribute_present("transitionEvaluation"))
464             { // optional, boolean, false
465                 props.transition_evaluation.set(parser().attribute<bool>("transitionEvaluation"));
466             }
467             if (parser().attribute_present("transitionEntry"))
468             { // optional, boolean, false
469                 props.transition_entry.set(parser().attribute<bool>("transitionEntry"));
470             }
471             if (parser().attribute_present("published"))
472             { // optional, boolean, true
473                 props.published.set(parser().attribute<bool>("published"));
474             }
475             if (parser().attribute_present("codeName"))
476             { // optional, string
477                 props.code_name.set(parser().attribute<std::string>("codeName"));
478             }
479             if (parser().attribute_present("filterMode"))
480             { // optional, boolean, false
481                 props.filter_mode.set(parser().attribute<bool>("filterMode"));
482             }
483             if (parser().attribute_present("enableFormatConditionsCalculation"))
484             { // optional, boolean, true
485                 props.enable_format_condition_calculation.set(parser().attribute<bool>("enableFormatConditionsCalculation"));
486             }
487             ws.d_->sheet_properties_.set(props);
488             while (in_element(current_worksheet_element))
489             {
490                 auto sheet_pr_child_element = expect_start_element(xml::content::simple);
491 
492                 if (sheet_pr_child_element == qn("spreadsheetml", "tabColor")) // CT_Color 0-1
493                 {
494                     read_color();
495                 }
496                 else if (sheet_pr_child_element == qn("spreadsheetml", "outlinePr")) // CT_OutlinePr 0-1
497                 {
498                     skip_attribute("applyStyles"); // optional, boolean, false
499                     skip_attribute("summaryBelow"); // optional, boolean, true
500                     skip_attribute("summaryRight"); // optional, boolean, true
501                     skip_attribute("showOutlineSymbols"); // optional, boolean, true
502                 }
503                 else if (sheet_pr_child_element == qn("spreadsheetml", "pageSetUpPr")) // CT_PageSetUpPr 0-1
504                 {
505                     skip_attribute("autoPageBreaks"); // optional, boolean, true
506                     skip_attribute("fitToPage"); // optional, boolean, false
507                 }
508                 else
509                 {
510                     unexpected_element(sheet_pr_child_element);
511                 }
512 
513                 expect_end_element(sheet_pr_child_element);
514             }
515         }
516         else if (current_worksheet_element == qn("spreadsheetml", "dimension")) // CT_SheetDimension 0-1
517         {
518             skip_remaining_content(current_worksheet_element);
519         }
520         else if (current_worksheet_element == qn("spreadsheetml", "sheetViews")) // CT_SheetViews 0-1
521         {
522             while (in_element(current_worksheet_element))
523             {
524                 expect_start_element(qn("spreadsheetml", "sheetView"), xml::content::complex); // CT_SheetView 1+
525 
526                 sheet_view new_view;
527                 new_view.id(parser().attribute<std::size_t>("workbookViewId"));
528 
529                 if (parser().attribute_present("showGridLines")) // default="true"
530                 {
531                     new_view.show_grid_lines(is_true(parser().attribute("showGridLines")));
532                 }
533                 if (parser().attribute_present("topLeftCell"))
534                 {
535                     new_view.top_left_cell(cell_reference(parser().attribute("topLeftCell")));
536                 }
537 
538                 if (parser().attribute_present("defaultGridColor")) // default="true"
539                 {
540                     new_view.default_grid_color(is_true(parser().attribute("defaultGridColor")));
541                 }
542 
543                 if (parser().attribute_present("view")
544                     && parser().attribute("view") != "normal")
545                 {
546                     new_view.type(parser().attribute("view") == "pageBreakPreview"
547                             ? sheet_view_type::page_break_preview
548                             : sheet_view_type::page_layout);
549                 }
550 
551                 if (parser().attribute_present("tabSelected")
552                     && is_true(parser().attribute("tabSelected")))
553                 {
554                     target_.d_->view_.get().active_tab = ws.id() - 1;
555                 }
556 
557                 skip_attributes({"windowProtection", "showFormulas", "showRowColHeaders", "showZeros", "rightToLeft", "showRuler", "showOutlineSymbols", "showWhiteSpace",
558                     "view", "topLeftCell", "colorId", "zoomScale", "zoomScaleNormal", "zoomScaleSheetLayoutView",
559                     "zoomScalePageLayoutView"});
560 
561                 while (in_element(qn("spreadsheetml", "sheetView")))
562                 {
563                     auto sheet_view_child_element = expect_start_element(xml::content::simple);
564 
565                     if (sheet_view_child_element == qn("spreadsheetml", "pane")) // CT_Pane 0-1
566                     {
567                         pane new_pane;
568 
569                         if (parser().attribute_present("topLeftCell"))
570                         {
571                             new_pane.top_left_cell = cell_reference(parser().attribute("topLeftCell"));
572                         }
573 
574                         if (parser().attribute_present("xSplit"))
575                         {
576                             new_pane.x_split = parser().attribute<column_t::index_t>("xSplit");
577                         }
578 
579                         if (parser().attribute_present("ySplit"))
580                         {
581                             new_pane.y_split = parser().attribute<row_t>("ySplit");
582                         }
583 
584                         if (parser().attribute_present("activePane"))
585                         {
586                             new_pane.active_pane = parser().attribute<pane_corner>("activePane");
587                         }
588 
589                         if (parser().attribute_present("state"))
590                         {
591                             new_pane.state = parser().attribute<pane_state>("state");
592                         }
593 
594                         new_view.pane(new_pane);
595                     }
596                     else if (sheet_view_child_element == qn("spreadsheetml", "selection")) // CT_Selection 0-4
597                     {
598                         selection current_selection;
599 
600                         if (parser().attribute_present("activeCell"))
601                         {
602                             current_selection.active_cell(parser().attribute("activeCell"));
603                         }
604 
605                         if (parser().attribute_present("sqref"))
606                         {
607                             const auto sqref = range_reference(parser().attribute("sqref"));
608                             current_selection.sqref(sqref);
609                         }
610 
611                         if (parser().attribute_present("pane"))
612                         {
613                             current_selection.pane(parser().attribute<pane_corner>("pane"));
614                         }
615 
616                         new_view.add_selection(current_selection);
617 
618                         skip_remaining_content(sheet_view_child_element);
619                     }
620                     else if (sheet_view_child_element == qn("spreadsheetml", "pivotSelection")) // CT_PivotSelection 0-4
621                     {
622                         skip_remaining_content(sheet_view_child_element);
623                     }
624                     else if (sheet_view_child_element == qn("spreadsheetml", "extLst")) // CT_ExtensionList 0-1
625                     {
626                         skip_remaining_content(sheet_view_child_element);
627                     }
628                     else
629                     {
630                         unexpected_element(sheet_view_child_element);
631                     }
632 
633                     expect_end_element(sheet_view_child_element);
634                 }
635 
636                 expect_end_element(qn("spreadsheetml", "sheetView"));
637 
638                 ws.d_->views_.push_back(new_view);
639             }
640         }
641         else if (current_worksheet_element == qn("spreadsheetml", "sheetFormatPr")) // CT_SheetFormatPr 0-1
642         {
643             if (parser().attribute_present("baseColWidth"))
644             {
645                 ws.d_->format_properties_.base_col_width =
646                     converter_.deserialise(parser().attribute("baseColWidth"));
647             }
648             if (parser().attribute_present("defaultColWidth"))
649             {
650                 ws.d_->format_properties_.default_column_width =
651                     converter_.deserialise(parser().attribute("defaultColWidth"));
652             }
653             if (parser().attribute_present("defaultRowHeight"))
654             {
655                 ws.d_->format_properties_.default_row_height =
656                     converter_.deserialise(parser().attribute("defaultRowHeight"));
657             }
658 
659             if (parser().attribute_present(qn("x14ac", "dyDescent")))
660             {
661                 ws.d_->format_properties_.dy_descent =
662                     converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
663             }
664 
665             skip_attributes();
666         }
667         else if (current_worksheet_element == qn("spreadsheetml", "cols")) // CT_Cols 0+
668         {
669             while (in_element(qn("spreadsheetml", "cols")))
670             {
671                 expect_start_element(qn("spreadsheetml", "col"), xml::content::simple);
672 
673                 skip_attributes(std::vector<std::string>{"collapsed", "outlineLevel"});
674 
675                 auto min = static_cast<column_t::index_t>(std::stoull(parser().attribute("min")));
676                 auto max = static_cast<column_t::index_t>(std::stoull(parser().attribute("max")));
677 
678                 // avoid uninitialised warnings in GCC by using a lambda to make the conditional initialisation
679                 optional<double> width = [this](xml::parser &p) -> xlnt::optional<double> {
680                     if (p.attribute_present("width"))
681                     {
682                         return (converter_.deserialise(p.attribute("width")) * 7 - 5) / 7;
683                     }
684                     return xlnt::optional<double>();
685                 }(parser());
686                 // avoid uninitialised warnings in GCC by using a lambda to make the conditional initialisation
687                 optional<std::size_t> column_style = [](xml::parser &p) -> xlnt::optional<std::size_t> {
688                     if (p.attribute_present("style"))
689                     {
690                         return p.attribute<std::size_t>("style");
691                     }
692                     return xlnt::optional<std::size_t>();
693                 }(parser());
694 
695                 auto custom = parser().attribute_present("customWidth")
696                     ? is_true(parser().attribute("customWidth"))
697                     : false;
698                 auto hidden = parser().attribute_present("hidden")
699                     ? is_true(parser().attribute("hidden"))
700                     : false;
701                 auto best_fit = parser().attribute_present("bestFit")
702                     ? is_true(parser().attribute("bestFit"))
703                     : false;
704 
705                 expect_end_element(qn("spreadsheetml", "col"));
706 
707                 for (auto column = min; column <= max; column++)
708                 {
709                     column_properties props;
710 
711                     if (width.is_set())
712                     {
713                         props.width = width.get();
714                     }
715 
716                     if (column_style.is_set())
717                     {
718                         props.style = column_style.get();
719                     }
720 
721                     props.hidden = hidden;
722                     props.custom_width = custom;
723                     props.best_fit = best_fit;
724                     ws.add_column_properties(column, props);
725                 }
726             }
727         }
728         else if (current_worksheet_element == qn("spreadsheetml", "sheetData")) // CT_SheetData 1
729         {
730             return title;
731         }
732 
733         expect_end_element(current_worksheet_element);
734     }
735 
736     return title;
737 }
738 
read_worksheet_sheetdata()739 void xlsx_consumer::read_worksheet_sheetdata()
740 {
741     if (stack_.back() != qn("spreadsheetml", "sheetData"))
742     {
743         return;
744     }
745     Sheet_Data ws_data = parse_sheet_data(parser_, converter_);
746     // NOTE: parse->construct are seperated here and could easily be threaded
747     // with a SPSC queue for what is likely to be an easy performance win
748     for (auto &row : ws_data.parsed_rows)
749     {
750         current_worksheet_->row_properties_.emplace(row.second, std::move(row.first));
751     }
752     auto impl = detail::cell_impl();
753     for (Cell &cell : ws_data.parsed_cells)
754     {
755         impl.parent_ = current_worksheet_;
756         impl.column_ = cell.ref.column;
757         impl.row_ = cell.ref.row;
758         detail::cell_impl *ws_cell_impl = &current_worksheet_->cell_map_.emplace(cell_reference(impl.column_, impl.row_), std::move(impl)).first->second;
759         if (cell.style_index != -1)
760         {
761             ws_cell_impl->format_ = target_.format(static_cast<size_t>(cell.style_index)).d_;
762         }
763         if (cell.cell_metatdata_idx != -1)
764         {
765         }
766         ws_cell_impl->phonetics_visible_ = cell.is_phonetic;
767         if (!cell.formula_string.empty())
768         {
769             ws_cell_impl->formula_ = cell.formula_string[0] == '=' ? cell.formula_string.substr(1) : std::move(cell.formula_string);
770         }
771         if (!cell.value.empty())
772         {
773             ws_cell_impl->type_ = cell.type;
774             switch (cell.type)
775             {
776             case cell::type::boolean: {
777                 ws_cell_impl->value_numeric_ = is_true(cell.value) ? 1.0 : 0.0;
778                 break;
779             }
780             case cell::type::empty:
781             case cell::type::number:
782             case cell::type::date: {
783                 ws_cell_impl->value_numeric_ = converter_.deserialise(cell.value);
784                 break;
785             }
786             case cell::type::shared_string: {
787                 ws_cell_impl->value_numeric_ = static_cast<double>(strtol(cell.value.c_str(), nullptr, 10));
788                 break;
789             }
790             case cell::type::inline_string: {
791                 ws_cell_impl->value_text_ = std::move(cell.value);
792                 break;
793             }
794             case cell::type::formula_string: {
795                 ws_cell_impl->value_text_ = std::move(cell.value);
796                 break;
797             }
798             case cell::type::error: {
799                 ws_cell_impl->value_text_.plain_text(cell.value, false);
800                 break;
801             }
802             }
803         }
804     }
805     stack_.pop_back();
806 }
807 
read_worksheet_end(const std::string & rel_id)808 worksheet xlsx_consumer::read_worksheet_end(const std::string &rel_id)
809 {
810     auto &manifest = target_.manifest();
811 
812     const auto workbook_rel = manifest.relationship(path("/"), relationship_type::office_document);
813     const auto sheet_rel = manifest.relationship(workbook_rel.target().path(), rel_id);
814     path sheet_path(sheet_rel.source().path().parent().append(sheet_rel.target().path()));
815     auto hyperlinks = manifest.relationships(sheet_path, xlnt::relationship_type::hyperlink);
816 
817     auto ws = worksheet(current_worksheet_);
818 
819     while (in_element(qn("spreadsheetml", "worksheet")))
820     {
821         auto current_worksheet_element = expect_start_element(xml::content::complex);
822 
823         if (current_worksheet_element == qn("spreadsheetml", "sheetCalcPr")) // CT_SheetCalcPr 0-1
824         {
825             skip_remaining_content(current_worksheet_element);
826         }
827         else if (current_worksheet_element == qn("spreadsheetml", "sheetProtection")) // CT_SheetProtection 0-1
828         {
829             skip_remaining_content(current_worksheet_element);
830         }
831         else if (current_worksheet_element == qn("spreadsheetml", "protectedRanges")) // CT_ProtectedRanges 0-1
832         {
833             skip_remaining_content(current_worksheet_element);
834         }
835         else if (current_worksheet_element == qn("spreadsheetml", "scenarios")) // CT_Scenarios 0-1
836         {
837             skip_remaining_content(current_worksheet_element);
838         }
839         else if (current_worksheet_element == qn("spreadsheetml", "autoFilter")) // CT_AutoFilter 0-1
840         {
841             ws.auto_filter(xlnt::range_reference(parser().attribute("ref")));
842             // auto filter complex
843             skip_remaining_content(current_worksheet_element);
844         }
845         else if (current_worksheet_element == qn("spreadsheetml", "sortState")) // CT_SortState 0-1
846         {
847             skip_remaining_content(current_worksheet_element);
848         }
849         else if (current_worksheet_element == qn("spreadsheetml", "dataConsolidate")) // CT_DataConsolidate 0-1
850         {
851             skip_remaining_content(current_worksheet_element);
852         }
853         else if (current_worksheet_element == qn("spreadsheetml", "customSheetViews")) // CT_CustomSheetViews 0-1
854         {
855             skip_remaining_content(current_worksheet_element);
856         }
857         else if (current_worksheet_element == qn("spreadsheetml", "mergeCells")) // CT_MergeCells 0-1
858         {
859             auto count = std::stoull(parser().attribute("count"));
860 
861             while (in_element(qn("spreadsheetml", "mergeCells")))
862             {
863                 expect_start_element(qn("spreadsheetml", "mergeCell"), xml::content::simple);
864                 ws.merge_cells(range_reference(parser().attribute("ref")));
865                 expect_end_element(qn("spreadsheetml", "mergeCell"));
866 
867                 count--;
868             }
869 
870             if (count != 0)
871             {
872                 throw invalid_file("sizes don't match");
873             }
874         }
875         else if (current_worksheet_element == qn("spreadsheetml", "phoneticPr")) // CT_PhoneticPr 0-1
876         {
877             phonetic_pr phonetic_properties(parser().attribute<std::uint32_t>("fontId"));
878             if (parser().attribute_present("type"))
879             {
880                 phonetic_properties.type(phonetic_pr::type_from_string(parser().attribute("type")));
881             }
882             if (parser().attribute_present("alignment"))
883             {
884                 phonetic_properties.alignment(phonetic_pr::alignment_from_string(parser().attribute("alignment")));
885             }
886             current_worksheet_->phonetic_properties_.set(phonetic_properties);
887         }
888         else if (current_worksheet_element == qn("spreadsheetml", "conditionalFormatting")) // CT_ConditionalFormatting 0+
889         {
890             skip_remaining_content(current_worksheet_element);
891         }
892         else if (current_worksheet_element == qn("spreadsheetml", "dataValidations")) // CT_DataValidations 0-1
893         {
894             skip_remaining_content(current_worksheet_element);
895         }
896         else if (current_worksheet_element == qn("spreadsheetml", "hyperlinks")) // CT_Hyperlinks 0-1
897         {
898             while (in_element(current_worksheet_element))
899             {
900                 // CT_Hyperlink
901                 expect_start_element(qn("spreadsheetml", "hyperlink"), xml::content::simple);
902 
903                 auto cell = ws.cell(parser().attribute("ref"));
904 
905                 if (parser().attribute_present(qn("r", "id")))
906                 {
907                     auto hyperlink_rel_id = parser().attribute(qn("r", "id"));
908                     auto hyperlink_rel = std::find_if(hyperlinks.begin(), hyperlinks.end(),
909                         [&](const relationship &r) { return r.id() == hyperlink_rel_id; });
910 
911                     if (hyperlink_rel != hyperlinks.end())
912                     {
913                         auto url = hyperlink_rel->target().path().string();
914 
915                         if (cell.has_value())
916                         {
917                             cell.hyperlink(url, cell.value<std::string>());
918                         }
919                         else
920                         {
921                             cell.hyperlink(url);
922                         }
923                     }
924                 }
925                 else if (parser().attribute_present("location"))
926                 {
927                     auto hyperlink = hyperlink_impl();
928 
929                     auto location = parser().attribute("location");
930                     hyperlink.relationship = relationship("", relationship_type::hyperlink,
931                         uri(""), uri(location), target_mode::internal);
932 
933                     if (parser().attribute_present("display"))
934                     {
935                         hyperlink.display = parser().attribute("display");
936                     }
937 
938                     if (parser().attribute_present("tooltip"))
939                     {
940                         hyperlink.tooltip = parser().attribute("tooltip");
941                     }
942 
943                     cell.d_->hyperlink_ = hyperlink;
944                 }
945 
946                 expect_end_element(qn("spreadsheetml", "hyperlink"));
947             }
948         }
949         else if (current_worksheet_element == qn("spreadsheetml", "printOptions")) // CT_PrintOptions 0-1
950         {
951             print_options opts;
952             if (parser().attribute_present("gridLines"))
953             {
954                 opts.print_grid_lines.set(parser().attribute<bool>("gridLines"));
955             }
956             if (parser().attribute_present("gridLinesSet"))
957             {
958                 opts.print_grid_lines.set(parser().attribute<bool>("gridLinesSet"));
959             }
960             if (parser().attribute_present("headings"))
961             {
962                 opts.print_grid_lines.set(parser().attribute<bool>("headings"));
963             }
964             if (parser().attribute_present("horizontalCentered"))
965             {
966                 opts.print_grid_lines.set(parser().attribute<bool>("horizontalCentered"));
967             }
968             if (parser().attribute_present("verticalCentered"))
969             {
970                 opts.print_grid_lines.set(parser().attribute<bool>("verticalCentered"));
971             }
972             ws.d_->print_options_.set(opts);
973             skip_remaining_content(current_worksheet_element);
974         }
975         else if (current_worksheet_element == qn("spreadsheetml", "pageMargins")) // CT_PageMargins 0-1
976         {
977             page_margins margins;
978 
979             margins.top(converter_.deserialise(parser().attribute("top")));
980             margins.bottom(converter_.deserialise(parser().attribute("bottom")));
981             margins.left(converter_.deserialise(parser().attribute("left")));
982             margins.right(converter_.deserialise(parser().attribute("right")));
983             margins.header(converter_.deserialise(parser().attribute("header")));
984             margins.footer(converter_.deserialise(parser().attribute("footer")));
985 
986             ws.page_margins(margins);
987         }
988         else if (current_worksheet_element == qn("spreadsheetml", "pageSetup")) // CT_PageSetup 0-1
989         {
990             page_setup setup;
991             if (parser().attribute_present("orientation"))
992             {
993                 setup.orientation_.set(parser().attribute<orientation>("orientation"));
994             }
995             if (parser().attribute_present("horizontalDpi"))
996             {
997                 setup.horizontal_dpi_.set(parser().attribute<std::size_t>("horizontalDpi"));
998             }
999             if (parser().attribute_present("verticalDpi"))
1000             {
1001                 setup.vertical_dpi_.set(parser().attribute<std::size_t>("verticalDpi"));
1002             }
1003             ws.page_setup(setup);
1004             skip_remaining_content(current_worksheet_element);
1005         }
1006         else if (current_worksheet_element == qn("spreadsheetml", "headerFooter")) // CT_HeaderFooter 0-1
1007         {
1008             header_footer hf;
1009 
1010             hf.align_with_margins(!parser().attribute_present("alignWithMargins")
1011                 || is_true(parser().attribute("alignWithMargins")));
1012             hf.scale_with_doc(!parser().attribute_present("alignWithMargins")
1013                 || is_true(parser().attribute("alignWithMargins")));
1014             auto different_odd_even = parser().attribute_present("differentOddEven")
1015                 && is_true(parser().attribute("differentOddEven"));
1016             auto different_first = parser().attribute_present("differentFirst")
1017                 && is_true(parser().attribute("differentFirst"));
1018 
1019             optional<std::array<optional<rich_text>, 3>> odd_header;
1020             optional<std::array<optional<rich_text>, 3>> odd_footer;
1021             optional<std::array<optional<rich_text>, 3>> even_header;
1022             optional<std::array<optional<rich_text>, 3>> even_footer;
1023             optional<std::array<optional<rich_text>, 3>> first_header;
1024             optional<std::array<optional<rich_text>, 3>> first_footer;
1025 
1026             using xlnt::detail::decode_header_footer;
1027 
1028             while (in_element(current_worksheet_element))
1029             {
1030                 auto current_hf_element = expect_start_element(xml::content::simple);
1031 
1032                 if (current_hf_element == qn("spreadsheetml", "oddHeader"))
1033                 {
1034                     odd_header = decode_header_footer(read_text(), converter_);
1035                 }
1036                 else if (current_hf_element == qn("spreadsheetml", "oddFooter"))
1037                 {
1038                     odd_footer = decode_header_footer(read_text(), converter_);
1039                 }
1040                 else if (current_hf_element == qn("spreadsheetml", "evenHeader"))
1041                 {
1042                     even_header = decode_header_footer(read_text(), converter_);
1043                 }
1044                 else if (current_hf_element == qn("spreadsheetml", "evenFooter"))
1045                 {
1046                     even_footer = decode_header_footer(read_text(), converter_);
1047                 }
1048                 else if (current_hf_element == qn("spreadsheetml", "firstHeader"))
1049                 {
1050                     first_header = decode_header_footer(read_text(), converter_);
1051                 }
1052                 else if (current_hf_element == qn("spreadsheetml", "firstFooter"))
1053                 {
1054                     first_footer = decode_header_footer(read_text(), converter_);
1055                 }
1056                 else
1057                 {
1058                     unexpected_element(current_hf_element);
1059                 }
1060 
1061                 expect_end_element(current_hf_element);
1062             }
1063 
1064             for (std::size_t i = 0; i < 3; ++i)
1065             {
1066                 auto loc = i == 0 ? header_footer::location::left
1067                                   : i == 1 ? header_footer::location::center : header_footer::location::right;
1068 
1069                 if (different_odd_even)
1070                 {
1071                     if (odd_header.is_set()
1072                         && odd_header.get().at(i).is_set()
1073                         && even_header.is_set()
1074                         && even_header.get().at(i).is_set())
1075                     {
1076                         hf.odd_even_header(loc, odd_header.get().at(i).get(), even_header.get().at(i).get());
1077                     }
1078 
1079                     if (odd_footer.is_set()
1080                         && odd_footer.get().at(i).is_set()
1081                         && even_footer.is_set()
1082                         && even_footer.get().at(i).is_set())
1083                     {
1084                         hf.odd_even_footer(loc, odd_footer.get().at(i).get(), even_footer.get().at(i).get());
1085                     }
1086                 }
1087                 else
1088                 {
1089                     if (odd_header.is_set() && odd_header.get().at(i).is_set())
1090                     {
1091                         hf.header(loc, odd_header.get().at(i).get());
1092                     }
1093 
1094                     if (odd_footer.is_set() && odd_footer.get().at(i).is_set())
1095                     {
1096                         hf.footer(loc, odd_footer.get().at(i).get());
1097                     }
1098                 }
1099 
1100                 if (different_first)
1101                 {
1102                 }
1103             }
1104 
1105             ws.header_footer(hf);
1106         }
1107         else if (current_worksheet_element == qn("spreadsheetml", "rowBreaks")) // CT_PageBreak 0-1
1108         {
1109             auto count = parser().attribute_present("count") ? parser().attribute<std::size_t>("count") : 0;
1110             auto manual_break_count = parser().attribute_present("manualBreakCount")
1111                 ? parser().attribute<std::size_t>("manualBreakCount")
1112                 : 0;
1113 
1114             while (in_element(qn("spreadsheetml", "rowBreaks")))
1115             {
1116                 expect_start_element(qn("spreadsheetml", "brk"), xml::content::simple);
1117 
1118                 if (parser().attribute_present("id"))
1119                 {
1120                     ws.page_break_at_row(parser().attribute<row_t>("id"));
1121                     --count;
1122                 }
1123 
1124                 if (parser().attribute_present("man") && is_true(parser().attribute("man")))
1125                 {
1126                     --manual_break_count;
1127                 }
1128 
1129                 skip_attributes({"min", "max", "pt"});
1130                 expect_end_element(qn("spreadsheetml", "brk"));
1131             }
1132         }
1133         else if (current_worksheet_element == qn("spreadsheetml", "colBreaks")) // CT_PageBreak 0-1
1134         {
1135             auto count = parser().attribute_present("count") ? parser().attribute<std::size_t>("count") : 0;
1136             auto manual_break_count = parser().attribute_present("manualBreakCount")
1137                 ? parser().attribute<std::size_t>("manualBreakCount")
1138                 : 0;
1139 
1140             while (in_element(qn("spreadsheetml", "colBreaks")))
1141             {
1142                 expect_start_element(qn("spreadsheetml", "brk"), xml::content::simple);
1143 
1144                 if (parser().attribute_present("id"))
1145                 {
1146                     ws.page_break_at_column(parser().attribute<column_t::index_t>("id"));
1147                     --count;
1148                 }
1149 
1150                 if (parser().attribute_present("man") && is_true(parser().attribute("man")))
1151                 {
1152                     --manual_break_count;
1153                 }
1154 
1155                 skip_attributes({"min", "max", "pt"});
1156                 expect_end_element(qn("spreadsheetml", "brk"));
1157             }
1158         }
1159         else if (current_worksheet_element == qn("spreadsheetml", "customProperties")) // CT_CustomProperties 0-1
1160         {
1161             skip_remaining_content(current_worksheet_element);
1162         }
1163         else if (current_worksheet_element == qn("spreadsheetml", "cellWatches")) // CT_CellWatches 0-1
1164         {
1165             skip_remaining_content(current_worksheet_element);
1166         }
1167         else if (current_worksheet_element == qn("spreadsheetml", "ignoredErrors")) // CT_IgnoredErrors 0-1
1168         {
1169             skip_remaining_content(current_worksheet_element);
1170         }
1171         else if (current_worksheet_element == qn("spreadsheetml", "smartTags")) // CT_SmartTags 0-1
1172         {
1173             skip_remaining_content(current_worksheet_element);
1174         }
1175         else if (current_worksheet_element == qn("spreadsheetml", "drawing")) // CT_Drawing 0-1
1176         {
1177             if (parser().attribute_present(qn("r", "id")))
1178             {
1179                 auto drawing_rel_id = parser().attribute(qn("r", "id"));
1180                 ws.d_->drawing_rel_id_ = drawing_rel_id;
1181             }
1182         }
1183         else if (current_worksheet_element == qn("spreadsheetml", "legacyDrawing"))
1184         {
1185             skip_remaining_content(current_worksheet_element);
1186         }
1187         else if (current_worksheet_element == qn("spreadsheetml", "extLst"))
1188         {
1189             ext_list extensions(parser(), current_worksheet_element.namespace_());
1190             ws.d_->extension_list_.set(extensions);
1191         }
1192         else
1193         {
1194             unexpected_element(current_worksheet_element);
1195         }
1196 
1197         expect_end_element(current_worksheet_element);
1198     }
1199 
1200     expect_end_element(qn("spreadsheetml", "worksheet"));
1201 
1202     if (manifest.has_relationship(sheet_path, xlnt::relationship_type::comments))
1203     {
1204         auto comments_part = manifest.canonicalize({workbook_rel, sheet_rel,
1205             manifest.relationship(sheet_path, xlnt::relationship_type::comments)});
1206 
1207         auto receive = xml::parser::receive_default;
1208         auto comments_part_streambuf = archive_->open(comments_part);
1209         std::istream comments_part_stream(comments_part_streambuf.get());
1210         xml::parser parser(comments_part_stream, comments_part.string(), receive);
1211         parser_ = &parser;
1212 
1213         read_comments(ws);
1214 
1215         if (manifest.has_relationship(sheet_path, xlnt::relationship_type::vml_drawing))
1216         {
1217             auto vml_drawings_part = manifest.canonicalize({workbook_rel, sheet_rel,
1218                 manifest.relationship(sheet_path, xlnt::relationship_type::vml_drawing)});
1219 
1220             auto vml_drawings_part_streambuf = archive_->open(comments_part);
1221             std::istream vml_drawings_part_stream(comments_part_streambuf.get());
1222             xml::parser vml_parser(vml_drawings_part_stream, vml_drawings_part.string(), receive);
1223             parser_ = &vml_parser;
1224 
1225             read_vml_drawings(ws);
1226         }
1227     }
1228 
1229     if (manifest.has_relationship(sheet_path, xlnt::relationship_type::drawings))
1230     {
1231         auto drawings_part = manifest.canonicalize({workbook_rel, sheet_rel,
1232             manifest.relationship(sheet_path, xlnt::relationship_type::drawings)});
1233 
1234         auto receive = xml::parser::receive_default;
1235         auto drawings_part_streambuf = archive_->open(drawings_part);
1236         std::istream drawings_part_stream(drawings_part_streambuf.get());
1237         xml::parser parser(drawings_part_stream, drawings_part.string(), receive);
1238         parser_ = &parser;
1239 
1240         read_drawings(ws, drawings_part);
1241     }
1242 
1243     return ws;
1244 }
1245 
parser()1246 xml::parser &xlsx_consumer::parser()
1247 {
1248     return *parser_;
1249 }
1250 
has_cell()1251 bool xlsx_consumer::has_cell()
1252 {
1253     auto ws = worksheet(current_worksheet_);
1254 
1255     while (streaming_cell_ // we're not at the end of the file
1256            && !in_element(qn("spreadsheetml", "row"))) // we're at the end of a row, or between rows
1257     {
1258         if (parser().peek() == xml::parser::event_type::end_element
1259             && stack_.back() == qn("spreadsheetml", "row"))
1260         {
1261             // We're at the end of a row.
1262             expect_end_element(qn("spreadsheetml", "row"));
1263             // ... and keep parsing.
1264         }
1265 
1266         if (parser().peek() == xml::parser::event_type::end_element
1267             && stack_.back() == qn("spreadsheetml", "sheetData"))
1268         {
1269             // End of sheet. Mark it by setting streaming_cell_ to nullptr, so we never get here again.
1270             expect_end_element(qn("spreadsheetml", "sheetData"));
1271             streaming_cell_.reset(nullptr);
1272             break;
1273         }
1274 
1275         expect_start_element(qn("spreadsheetml", "row"), xml::content::complex); // CT_Row
1276         auto row_index = static_cast<row_t>(std::stoul(parser().attribute("r")));
1277         auto &row_properties = ws.row_properties(row_index);
1278 
1279         if (parser().attribute_present("ht"))
1280         {
1281             row_properties.height = converter_.deserialise(parser().attribute("ht"));
1282         }
1283 
1284         if (parser().attribute_present("customHeight"))
1285         {
1286             row_properties.custom_height = is_true(parser().attribute("customHeight"));
1287         }
1288 
1289         if (parser().attribute_present("hidden") && is_true(parser().attribute("hidden")))
1290         {
1291             row_properties.hidden = true;
1292         }
1293 
1294         if (parser().attribute_present(qn("x14ac", "dyDescent")))
1295         {
1296             row_properties.dy_descent = converter_.deserialise(parser().attribute(qn("x14ac", "dyDescent")));
1297         }
1298 
1299         if (parser().attribute_present("spans"))
1300         {
1301             row_properties.spans = parser().attribute("spans");
1302         }
1303 
1304         skip_attributes({"customFormat", "s", "customFont",
1305             "outlineLevel", "collapsed", "thickTop", "thickBot",
1306             "ph"});
1307     }
1308 
1309     if (!streaming_cell_)
1310     {
1311         // We're at the end of the worksheet
1312         return false;
1313     }
1314 
1315     expect_start_element(qn("spreadsheetml", "c"), xml::content::complex);
1316 
1317     assert(streaming_);
1318     auto cell = xlnt::cell(streaming_cell_.get());
1319     auto reference = cell_reference(parser().attribute("r"));
1320     cell.d_->parent_ = current_worksheet_;
1321     cell.d_->column_ = reference.column_index();
1322     cell.d_->row_ = reference.row();
1323 
1324     if (parser().attribute_present("ph"))
1325     {
1326         cell.d_->phonetics_visible_ = parser().attribute<bool>("ph");
1327     }
1328 
1329     auto has_type = parser().attribute_present("t");
1330     auto type = has_type ? parser().attribute("t") : "n";
1331 
1332     if (parser().attribute_present("s"))
1333     {
1334         cell.format(target_.format(static_cast<std::size_t>(std::stoull(parser().attribute("s")))));
1335     }
1336 
1337     auto has_value = false;
1338     auto value_string = std::string();
1339 
1340     auto has_formula = false;
1341     auto has_shared_formula = false;
1342     auto formula_value_string = std::string();
1343 
1344     while (in_element(qn("spreadsheetml", "c")))
1345     {
1346         auto current_element = expect_start_element(xml::content::mixed);
1347 
1348         if (current_element == qn("spreadsheetml", "v")) // s:ST_Xstring
1349         {
1350             has_value = true;
1351             value_string = read_text();
1352         }
1353         else if (current_element == qn("spreadsheetml", "f")) // CT_CellFormula
1354         {
1355             has_formula = true;
1356 
1357             if (parser().attribute_present("t"))
1358             {
1359                 has_shared_formula = parser().attribute("t") == "shared";
1360             }
1361 
1362             skip_attributes({"aca", "ref", "dt2D", "dtr", "del1",
1363                 "del2", "r1", "r2", "ca", "si", "bx"});
1364 
1365             formula_value_string = read_text();
1366         }
1367         else if (current_element == qn("spreadsheetml", "is")) // CT_Rst
1368         {
1369             expect_start_element(qn("spreadsheetml", "t"), xml::content::simple);
1370             has_value = true;
1371             value_string = read_text();
1372             expect_end_element(qn("spreadsheetml", "t"));
1373         }
1374         else
1375         {
1376             unexpected_element(current_element);
1377         }
1378 
1379         expect_end_element(current_element);
1380     }
1381 
1382     expect_end_element(qn("spreadsheetml", "c"));
1383 
1384     if (has_formula && !has_shared_formula)
1385     {
1386         cell.formula(formula_value_string);
1387     }
1388 
1389     if (has_value)
1390     {
1391         if (type == "str")
1392         {
1393             cell.d_->value_text_ = value_string;
1394             cell.data_type(cell::type::formula_string);
1395         }
1396         else if (type == "inlineStr")
1397         {
1398             cell.d_->value_text_ = value_string;
1399             cell.data_type(cell::type::inline_string);
1400         }
1401         else if (type == "s")
1402         {
1403             cell.d_->value_numeric_ = converter_.deserialise(value_string);
1404             cell.data_type(cell::type::shared_string);
1405         }
1406         else if (type == "b") // boolean
1407         {
1408             cell.value(is_true(value_string));
1409         }
1410         else if (type == "n") // numeric
1411         {
1412             cell.value(converter_.deserialise(value_string));
1413         }
1414         else if (!value_string.empty() && value_string[0] == '#')
1415         {
1416             cell.error(value_string);
1417         }
1418     }
1419 
1420     return true;
1421 }
1422 
read_relationships(const path & part)1423 std::vector<relationship> xlsx_consumer::read_relationships(const path &part)
1424 {
1425     const auto part_rels_path = part.parent().append("_rels").append(part.filename() + ".rels").relative_to(path("/"));
1426 
1427     std::vector<xlnt::relationship> relationships;
1428     if (!archive_->has_file(part_rels_path)) return relationships;
1429 
1430     auto rels_streambuf = archive_->open(part_rels_path);
1431     std::istream rels_stream(rels_streambuf.get());
1432     xml::parser parser(rels_stream, part_rels_path.string());
1433     parser_ = &parser;
1434 
1435     expect_start_element(qn("relationships", "Relationships"), xml::content::complex);
1436 
1437     while (in_element(qn("relationships", "Relationships")))
1438     {
1439         expect_start_element(qn("relationships", "Relationship"), xml::content::simple);
1440 
1441         const auto target_mode = parser.attribute_present("TargetMode")
1442             ? parser.attribute<xlnt::target_mode>("TargetMode")
1443             : xlnt::target_mode::internal;
1444         auto target = xlnt::uri(parser.attribute("Target"));
1445 
1446         if (target.path().is_absolute() && target_mode == xlnt::target_mode::internal)
1447         {
1448             target = uri(target.path().relative_to(path(part.string()).resolve(path("/"))).string());
1449         }
1450 
1451         relationships.emplace_back(parser.attribute("Id"),
1452             parser.attribute<xlnt::relationship_type>("Type"),
1453             xlnt::uri(part.string()), target, target_mode);
1454 
1455         expect_end_element(qn("relationships", "Relationship"));
1456     }
1457 
1458     expect_end_element(qn("relationships", "Relationships"));
1459     parser_ = nullptr;
1460 
1461     return relationships;
1462 }
1463 
read_part(const std::vector<relationship> & rel_chain)1464 void xlsx_consumer::read_part(const std::vector<relationship> &rel_chain)
1465 {
1466     const auto &manifest = target_.manifest();
1467     const auto part_path = manifest.canonicalize(rel_chain);
1468     auto part_streambuf = archive_->open(part_path);
1469     std::istream part_stream(part_streambuf.get());
1470     xml::parser parser(part_stream, part_path.string());
1471     parser_ = &parser;
1472 
1473     switch (rel_chain.back().type())
1474     {
1475     case relationship_type::core_properties:
1476         read_core_properties();
1477         break;
1478 
1479     case relationship_type::extended_properties:
1480         read_extended_properties();
1481         break;
1482 
1483     case relationship_type::custom_properties:
1484         read_custom_properties();
1485         break;
1486 
1487     case relationship_type::office_document:
1488         read_office_document(manifest.content_type(part_path));
1489         break;
1490 
1491     case relationship_type::connections:
1492         read_connections();
1493         break;
1494 
1495     case relationship_type::custom_xml_mappings:
1496         read_custom_xml_mappings();
1497         break;
1498 
1499     case relationship_type::external_workbook_references:
1500         read_external_workbook_references();
1501         break;
1502 
1503     case relationship_type::pivot_table:
1504         read_pivot_table();
1505         break;
1506 
1507     case relationship_type::shared_workbook_revision_headers:
1508         read_shared_workbook_revision_headers();
1509         break;
1510 
1511     case relationship_type::volatile_dependencies:
1512         read_volatile_dependencies();
1513         break;
1514 
1515     case relationship_type::shared_string_table:
1516         read_shared_string_table();
1517         break;
1518 
1519     case relationship_type::stylesheet:
1520         read_stylesheet();
1521         break;
1522 
1523     case relationship_type::theme:
1524         read_theme();
1525         break;
1526 
1527     case relationship_type::chartsheet:
1528         read_chartsheet(rel_chain.back().id());
1529         break;
1530 
1531     case relationship_type::dialogsheet:
1532         read_dialogsheet(rel_chain.back().id());
1533         break;
1534 
1535     case relationship_type::worksheet:
1536         read_worksheet(rel_chain.back().id());
1537         break;
1538 
1539     case relationship_type::thumbnail:
1540         read_image(part_path);
1541         break;
1542 
1543     case relationship_type::calculation_chain:
1544         read_calculation_chain();
1545         break;
1546 
1547     case relationship_type::hyperlink:
1548         break;
1549 
1550     case relationship_type::comments:
1551         break;
1552 
1553     case relationship_type::vml_drawing:
1554         break;
1555 
1556     case relationship_type::unknown:
1557         break;
1558 
1559     case relationship_type::printer_settings:
1560         break;
1561 
1562     case relationship_type::custom_property:
1563         break;
1564 
1565     case relationship_type::drawings:
1566         break;
1567 
1568     case relationship_type::pivot_table_cache_definition:
1569         break;
1570 
1571     case relationship_type::pivot_table_cache_records:
1572         break;
1573 
1574     case relationship_type::query_table:
1575         break;
1576 
1577     case relationship_type::shared_workbook:
1578         break;
1579 
1580     case relationship_type::revision_log:
1581         break;
1582 
1583     case relationship_type::shared_workbook_user_data:
1584         break;
1585 
1586     case relationship_type::single_cell_table_definitions:
1587         break;
1588 
1589     case relationship_type::table_definition:
1590         break;
1591 
1592     case relationship_type::image:
1593         read_image(part_path);
1594         break;
1595     }
1596 
1597     parser_ = nullptr;
1598 }
1599 
populate_workbook(bool streaming)1600 void xlsx_consumer::populate_workbook(bool streaming)
1601 {
1602     streaming_ = streaming;
1603 
1604     target_.clear();
1605 
1606     read_content_types();
1607     const auto root_path = path("/");
1608 
1609     for (const auto &package_rel : read_relationships(root_path))
1610     {
1611         manifest().register_relationship(package_rel);
1612     }
1613 
1614     for (auto package_rel : manifest().relationships(root_path))
1615     {
1616         if (package_rel.type() == relationship_type::office_document)
1617         {
1618             // Read the workbook after all the other package parts
1619             continue;
1620         }
1621 
1622         read_part({package_rel});
1623     }
1624 
1625     for (const auto &relationship_source_string : archive_->files())
1626     {
1627         for (const auto &part_rel : read_relationships(path(relationship_source_string)))
1628         {
1629             manifest().register_relationship(part_rel);
1630         }
1631     }
1632 
1633     read_part({manifest().relationship(root_path,
1634         relationship_type::office_document)});
1635 }
1636 
1637 // Package Parts
1638 
read_content_types()1639 void xlsx_consumer::read_content_types()
1640 {
1641     auto &manifest = target_.manifest();
1642     auto content_types_streambuf = archive_->open(path("[Content_Types].xml"));
1643     std::istream content_types_stream(content_types_streambuf.get());
1644     xml::parser parser(content_types_stream, "[Content_Types].xml");
1645     parser_ = &parser;
1646 
1647     expect_start_element(qn("content-types", "Types"), xml::content::complex);
1648 
1649     while (in_element(qn("content-types", "Types")))
1650     {
1651         auto current_element = expect_start_element(xml::content::complex);
1652 
1653         if (current_element == qn("content-types", "Default"))
1654         {
1655             auto extension = parser.attribute("Extension");
1656             auto content_type = parser.attribute("ContentType");
1657             manifest.register_default_type(extension, content_type);
1658         }
1659         else if (current_element == qn("content-types", "Override"))
1660         {
1661             auto part_name = parser.attribute("PartName");
1662             auto content_type = parser.attribute("ContentType");
1663             manifest.register_override_type(path(part_name), content_type);
1664         }
1665         else
1666         {
1667             unexpected_element(current_element);
1668         }
1669 
1670         expect_end_element(current_element);
1671     }
1672 
1673     expect_end_element(qn("content-types", "Types"));
1674 }
1675 
read_core_properties()1676 void xlsx_consumer::read_core_properties()
1677 {
1678     //qn("extended-properties", "Properties");
1679     //qn("custom-properties", "Properties");
1680     expect_start_element(qn("core-properties", "coreProperties"), xml::content::complex);
1681 
1682     while (in_element(qn("core-properties", "coreProperties")))
1683     {
1684         const auto property_element = expect_start_element(xml::content::simple);
1685         const auto prop = detail::from_string<core_property>(property_element.name());
1686         if (prop == core_property::created || prop == core_property::modified)
1687         {
1688             skip_attribute(qn("xsi", "type"));
1689         }
1690         target_.core_property(prop, read_text());
1691         expect_end_element(property_element);
1692     }
1693 
1694     expect_end_element(qn("core-properties", "coreProperties"));
1695 }
1696 
read_extended_properties()1697 void xlsx_consumer::read_extended_properties()
1698 {
1699     expect_start_element(qn("extended-properties", "Properties"), xml::content::complex);
1700 
1701     while (in_element(qn("extended-properties", "Properties")))
1702     {
1703         const auto property_element = expect_start_element(xml::content::mixed);
1704         const auto prop = detail::from_string<extended_property>(property_element.name());
1705         target_.extended_property(prop, read_variant());
1706         expect_end_element(property_element);
1707     }
1708 
1709     expect_end_element(qn("extended-properties", "Properties"));
1710 }
1711 
read_custom_properties()1712 void xlsx_consumer::read_custom_properties()
1713 {
1714     expect_start_element(qn("custom-properties", "Properties"), xml::content::complex);
1715 
1716     while (in_element(qn("custom-properties", "Properties")))
1717     {
1718         const auto property_element = expect_start_element(xml::content::complex);
1719         const auto prop = parser().attribute("name");
1720         const auto format_id = parser().attribute("fmtid");
1721         const auto property_id = parser().attribute("pid");
1722         target_.custom_property(prop, read_variant());
1723         expect_end_element(property_element);
1724     }
1725 
1726     expect_end_element(qn("custom-properties", "Properties"));
1727 }
1728 
read_office_document(const std::string & content_type)1729 void xlsx_consumer::read_office_document(const std::string &content_type) // CT_Workbook
1730 {
1731     if (content_type !=
1732             "application/vnd."
1733             "openxmlformats-officedocument.spreadsheetml.sheet.main+xml"
1734         && content_type !=
1735             "application/vnd."
1736             "openxmlformats-officedocument.spreadsheetml.template.main+xml")
1737     {
1738         throw xlnt::invalid_file(content_type);
1739     }
1740 
1741     target_.d_->calculation_properties_.clear();
1742 
1743     expect_start_element(qn("workbook", "workbook"), xml::content::complex);
1744     skip_attribute(qn("mc", "Ignorable"));
1745 
1746     while (in_element(qn("workbook", "workbook")))
1747     {
1748         auto current_workbook_element = expect_start_element(xml::content::complex);
1749 
1750         if (current_workbook_element == qn("workbook", "fileVersion")) // CT_FileVersion 0-1
1751         {
1752             detail::workbook_impl::file_version_t file_version;
1753 
1754             if (parser().attribute_present("appName"))
1755             {
1756                 file_version.app_name = parser().attribute("appName");
1757             }
1758 
1759             if (parser().attribute_present("lastEdited"))
1760             {
1761                 file_version.last_edited = parser().attribute<std::size_t>("lastEdited");
1762             }
1763 
1764             if (parser().attribute_present("lowestEdited"))
1765             {
1766                 file_version.lowest_edited = parser().attribute<std::size_t>("lowestEdited");
1767             }
1768 
1769             if (parser().attribute_present("lowestEdited"))
1770             {
1771                 file_version.rup_build = parser().attribute<std::size_t>("rupBuild");
1772             }
1773 
1774             skip_attribute("codeName");
1775 
1776             target_.d_->file_version_ = file_version;
1777         }
1778         else if (current_workbook_element == qn("workbook", "fileSharing")) // CT_FileSharing 0-1
1779         {
1780             skip_remaining_content(current_workbook_element);
1781         }
1782         else if (current_workbook_element == qn("mc", "AlternateContent"))
1783         {
1784             while (in_element(qn("mc", "AlternateContent")))
1785             {
1786                 auto alternate_content_element = expect_start_element(xml::content::complex);
1787 
1788                 if (alternate_content_element == qn("mc", "Choice")
1789                     && parser().attribute_present("Requires")
1790                     && parser().attribute("Requires") == "x15")
1791                 {
1792                     auto x15_element = expect_start_element(xml::content::simple);
1793 
1794                     if (x15_element == qn("x15ac", "absPath"))
1795                     {
1796                         target_.d_->abs_path_ = parser().attribute("url");
1797                     }
1798 
1799                     skip_remaining_content(x15_element);
1800                     expect_end_element(x15_element);
1801                 }
1802 
1803                 skip_remaining_content(alternate_content_element);
1804                 expect_end_element(alternate_content_element);
1805             }
1806         }
1807         else if (current_workbook_element == qn("workbook", "workbookPr")) // CT_WorkbookPr 0-1
1808         {
1809             target_.base_date(parser().attribute_present("date1904") // optional, bool=false
1810                         && is_true(parser().attribute("date1904"))
1811                     ? calendar::mac_1904
1812                     : calendar::windows_1900);
1813             skip_attribute("showObjects"); // optional, ST_Objects="all"
1814             skip_attribute("showBorderUnselectedTables"); // optional, bool=true
1815             skip_attribute("filterPrivacy"); // optional, bool=false
1816             skip_attribute("promptedSolutions"); // optional, bool=false
1817             skip_attribute("showInkAnnotation"); // optional, bool=true
1818             skip_attribute("backupFile"); // optional, bool=false
1819             skip_attribute("saveExternalLinkValues"); // optional, bool=true
1820             skip_attribute("updateLinks"); // optional, ST_UpdateLinks="userSet"
1821             skip_attribute("codeName"); // optional, string
1822             skip_attribute("hidePivotFieldList"); // optional, bool=false
1823             skip_attribute("showPivotChartFilter"); // optional, bool=false
1824             skip_attribute("allowRefreshQuery"); // optional, bool=false
1825             skip_attribute("publishItems"); // optional, bool=false
1826             skip_attribute("checkCompatibility"); // optional, bool=false
1827             skip_attribute("autoCompressPictures"); // optional, bool=true
1828             skip_attribute("refreshAllConnections"); // optional, bool=false
1829             skip_attribute("defaultThemeVersion"); // optional, uint
1830             skip_attribute("dateCompatibility"); // optional, bool (undocumented)
1831         }
1832         else if (current_workbook_element == qn("workbook", "workbookProtection")) // CT_WorkbookProtection 0-1
1833         {
1834             skip_remaining_content(current_workbook_element);
1835         }
1836         else if (current_workbook_element == qn("workbook", "bookViews")) // CT_BookViews 0-1
1837         {
1838             while (in_element(qn("workbook", "bookViews")))
1839             {
1840                 expect_start_element(qn("workbook", "workbookView"), xml::content::simple);
1841                 skip_attributes({"firstSheet", "showHorizontalScroll",
1842                     "showSheetTabs", "showVerticalScroll"});
1843 
1844                 workbook_view view;
1845 
1846                 if (parser().attribute_present("xWindow"))
1847                 {
1848                     view.x_window = parser().attribute<int>("xWindow");
1849                 }
1850 
1851                 if (parser().attribute_present("yWindow"))
1852                 {
1853                     view.y_window = parser().attribute<int>("yWindow");
1854                 }
1855 
1856                 if (parser().attribute_present("windowWidth"))
1857                 {
1858                     view.window_width = parser().attribute<std::size_t>("windowWidth");
1859                 }
1860 
1861                 if (parser().attribute_present("windowHeight"))
1862                 {
1863                     view.window_height = parser().attribute<std::size_t>("windowHeight");
1864                 }
1865 
1866                 if (parser().attribute_present("tabRatio"))
1867                 {
1868                     view.tab_ratio = parser().attribute<std::size_t>("tabRatio");
1869                 }
1870 
1871                 if (parser().attribute_present("activeTab"))
1872                 {
1873                     view.active_tab = parser().attribute<std::size_t>("activeTab");
1874                 }
1875 
1876                 target_.view(view);
1877 
1878                 skip_attributes();
1879                 expect_end_element(qn("workbook", "workbookView"));
1880             }
1881         }
1882         else if (current_workbook_element == qn("workbook", "sheets")) // CT_Sheets 1
1883         {
1884             std::size_t index = 0;
1885 
1886             while (in_element(qn("workbook", "sheets")))
1887             {
1888                 expect_start_element(qn("spreadsheetml", "sheet"), xml::content::simple);
1889 
1890                 auto title = parser().attribute("name");
1891 
1892                 sheet_title_index_map_[title] = index++;
1893                 sheet_title_id_map_[title] = parser().attribute<std::size_t>("sheetId");
1894                 target_.d_->sheet_title_rel_id_map_[title] = parser().attribute(qn("r", "id"));
1895 
1896                 bool hidden = parser().attribute<std::string>("state", "") == "hidden";
1897                 target_.d_->sheet_hidden_.push_back(hidden);
1898 
1899                 expect_end_element(qn("spreadsheetml", "sheet"));
1900             }
1901         }
1902         else if (current_workbook_element == qn("workbook", "functionGroups")) // CT_FunctionGroups 0-1
1903         {
1904             skip_remaining_content(current_workbook_element);
1905         }
1906         else if (current_workbook_element == qn("workbook", "externalReferences")) // CT_ExternalReferences 0-1
1907         {
1908             skip_remaining_content(current_workbook_element);
1909         }
1910         else if (current_workbook_element == qn("workbook", "definedNames")) // CT_DefinedNames 0-1
1911         {
1912             skip_remaining_content(current_workbook_element);
1913         }
1914         else if (current_workbook_element == qn("workbook", "calcPr")) // CT_CalcPr 0-1
1915         {
1916             xlnt::calculation_properties calc_props;
1917             if (parser().attribute_present("calcId"))
1918             {
1919                 calc_props.calc_id = parser().attribute<std::size_t>("calcId");
1920             }
1921             if (parser().attribute_present("concurrentCalc"))
1922             {
1923                 calc_props.concurrent_calc = is_true(parser().attribute("concurrentCalc"));
1924             }
1925             target_.calculation_properties(calc_props);
1926             parser().attribute_map(); // skip remaining
1927         }
1928         else if (current_workbook_element == qn("workbook", "oleSize")) // CT_OleSize 0-1
1929         {
1930             skip_remaining_content(current_workbook_element);
1931         }
1932         else if (current_workbook_element == qn("workbook", "customWorkbookViews")) // CT_CustomWorkbookViews 0-1
1933         {
1934             skip_remaining_content(current_workbook_element);
1935         }
1936         else if (current_workbook_element == qn("workbook", "pivotCaches")) // CT_PivotCaches 0-1
1937         {
1938             skip_remaining_content(current_workbook_element);
1939         }
1940         else if (current_workbook_element == qn("workbook", "smartTagPr")) // CT_SmartTagPr 0-1
1941         {
1942             skip_remaining_content(current_workbook_element);
1943         }
1944         else if (current_workbook_element == qn("workbook", "smartTagTypes")) // CT_SmartTagTypes 0-1
1945         {
1946             skip_remaining_content(current_workbook_element);
1947         }
1948         else if (current_workbook_element == qn("workbook", "webPublishing")) // CT_WebPublishing 0-1
1949         {
1950             skip_remaining_content(current_workbook_element);
1951         }
1952         else if (current_workbook_element == qn("workbook", "fileRecoveryPr")) // CT_FileRecoveryPr 0+
1953         {
1954             skip_remaining_content(current_workbook_element);
1955         }
1956         else if (current_workbook_element == qn("workbook", "webPublishObjects")) // CT_WebPublishObjects 0-1
1957         {
1958             skip_remaining_content(current_workbook_element);
1959         }
1960         else if (current_workbook_element == qn("workbook", "extLst")) // CT_ExtensionList 0-1
1961         {
1962             while (in_element(qn("workbook", "extLst")))
1963             {
1964                 auto extension_element = expect_start_element(xml::content::complex);
1965 
1966                 if (extension_element == qn("workbook", "ext")
1967                     && parser().attribute_present("uri")
1968                     && parser().attribute("uri") == "{7523E5D3-25F3-A5E0-1632-64F254C22452}")
1969                 {
1970                     auto arch_id_extension_element = expect_start_element(xml::content::simple);
1971 
1972                     if (arch_id_extension_element == qn("mx", "ArchID"))
1973                     {
1974                         target_.d_->arch_id_flags_ = parser().attribute<std::size_t>("Flags");
1975                     }
1976 
1977                     skip_remaining_content(arch_id_extension_element);
1978                     expect_end_element(arch_id_extension_element);
1979                 }
1980 
1981                 skip_remaining_content(extension_element);
1982                 expect_end_element(extension_element);
1983             }
1984         }
1985         else
1986         {
1987             unexpected_element(current_workbook_element);
1988         }
1989 
1990         expect_end_element(current_workbook_element);
1991     }
1992 
1993     expect_end_element(qn("workbook", "workbook"));
1994 
1995     auto workbook_rel = manifest().relationship(path("/"), relationship_type::office_document);
1996     auto workbook_path = workbook_rel.target().path();
1997 
1998     if (manifest().has_relationship(workbook_path, relationship_type::shared_string_table))
1999     {
2000         read_part({workbook_rel,
2001             manifest().relationship(workbook_path,
2002                 relationship_type::shared_string_table)});
2003     }
2004 
2005     if (manifest().has_relationship(workbook_path, relationship_type::stylesheet))
2006     {
2007         read_part({workbook_rel,
2008             manifest().relationship(workbook_path,
2009                 relationship_type::stylesheet)});
2010     }
2011 
2012     if (manifest().has_relationship(workbook_path, relationship_type::theme))
2013     {
2014         read_part({workbook_rel,
2015             manifest().relationship(workbook_path,
2016                 relationship_type::theme)});
2017     }
2018 
2019     for (auto worksheet_rel : manifest().relationships(workbook_path, relationship_type::worksheet))
2020     {
2021         auto title = std::find_if(target_.d_->sheet_title_rel_id_map_.begin(),
2022             target_.d_->sheet_title_rel_id_map_.end(),
2023             [&](const std::pair<std::string, std::string> &p) {
2024                 return p.second == worksheet_rel.id();
2025             })
2026                          ->first;
2027 
2028         auto id = sheet_title_id_map_[title];
2029         auto index = sheet_title_index_map_[title];
2030 
2031         auto insertion_iter = target_.d_->worksheets_.begin();
2032         while (insertion_iter != target_.d_->worksheets_.end() && sheet_title_index_map_[insertion_iter->title_] < index)
2033         {
2034             ++insertion_iter;
2035         }
2036 
2037         current_worksheet_ = &*target_.d_->worksheets_.emplace(insertion_iter, &target_, id, title);
2038 
2039         if (!streaming_)
2040         {
2041             read_part({workbook_rel, worksheet_rel});
2042         }
2043     }
2044 }
2045 
2046 // Write Workbook Relationship Target Parts
2047 
read_calculation_chain()2048 void xlsx_consumer::read_calculation_chain()
2049 {
2050 }
2051 
read_chartsheet(const std::string &)2052 void xlsx_consumer::read_chartsheet(const std::string & /*title*/)
2053 {
2054 }
2055 
read_connections()2056 void xlsx_consumer::read_connections()
2057 {
2058 }
2059 
read_custom_property()2060 void xlsx_consumer::read_custom_property()
2061 {
2062 }
2063 
read_custom_xml_mappings()2064 void xlsx_consumer::read_custom_xml_mappings()
2065 {
2066 }
2067 
read_dialogsheet(const std::string &)2068 void xlsx_consumer::read_dialogsheet(const std::string & /*title*/)
2069 {
2070 }
2071 
read_external_workbook_references()2072 void xlsx_consumer::read_external_workbook_references()
2073 {
2074 }
2075 
read_pivot_table()2076 void xlsx_consumer::read_pivot_table()
2077 {
2078 }
2079 
read_shared_string_table()2080 void xlsx_consumer::read_shared_string_table()
2081 {
2082     expect_start_element(qn("spreadsheetml", "sst"), xml::content::complex);
2083     skip_attributes({"count"});
2084 
2085     bool has_unique_count = false;
2086     std::size_t unique_count = 0;
2087 
2088     if (parser().attribute_present("uniqueCount"))
2089     {
2090         has_unique_count = true;
2091         unique_count = parser().attribute<std::size_t>("uniqueCount");
2092     }
2093 
2094     while (in_element(qn("spreadsheetml", "sst")))
2095     {
2096         expect_start_element(qn("spreadsheetml", "si"), xml::content::complex);
2097         auto rt = read_rich_text(qn("spreadsheetml", "si"));
2098         target_.add_shared_string(rt, true);
2099         expect_end_element(qn("spreadsheetml", "si"));
2100     }
2101 
2102     expect_end_element(qn("spreadsheetml", "sst"));
2103 
2104     if (has_unique_count && unique_count != target_.shared_strings().size())
2105     {
2106         throw invalid_file("sizes don't match");
2107     }
2108 }
2109 
read_shared_workbook_revision_headers()2110 void xlsx_consumer::read_shared_workbook_revision_headers()
2111 {
2112 }
2113 
read_shared_workbook()2114 void xlsx_consumer::read_shared_workbook()
2115 {
2116 }
2117 
read_shared_workbook_user_data()2118 void xlsx_consumer::read_shared_workbook_user_data()
2119 {
2120 }
2121 
read_stylesheet()2122 void xlsx_consumer::read_stylesheet()
2123 {
2124     target_.impl().stylesheet_ = detail::stylesheet();
2125     auto &stylesheet = target_.impl().stylesheet_.get();
2126 
2127     expect_start_element(qn("spreadsheetml", "styleSheet"), xml::content::complex);
2128     skip_attributes({qn("mc", "Ignorable")});
2129 
2130     std::vector<std::pair<style_impl, std::size_t>> styles;
2131     std::vector<std::pair<format_impl, std::size_t>> format_records;
2132     std::vector<std::pair<format_impl, std::size_t>> style_records;
2133 
2134     while (in_element(qn("spreadsheetml", "styleSheet")))
2135     {
2136         auto current_style_element = expect_start_element(xml::content::complex);
2137 
2138         if (current_style_element == qn("spreadsheetml", "borders"))
2139         {
2140             auto &borders = stylesheet.borders;
2141             auto count = parser().attribute<std::size_t>("count");
2142 
2143             while (in_element(qn("spreadsheetml", "borders")))
2144             {
2145                 borders.push_back(xlnt::border());
2146                 auto &border = borders.back();
2147 
2148                 expect_start_element(qn("spreadsheetml", "border"), xml::content::complex);
2149 
2150                 auto diagonal = diagonal_direction::neither;
2151 
2152                 if (parser().attribute_present("diagonalDown") && parser().attribute("diagonalDown") == "1")
2153                 {
2154                     diagonal = diagonal_direction::down;
2155                 }
2156 
2157                 if (parser().attribute_present("diagonalUp") && parser().attribute("diagonalUp") == "1")
2158                 {
2159                     diagonal = diagonal == diagonal_direction::down ? diagonal_direction::both : diagonal_direction::up;
2160                 }
2161 
2162                 if (diagonal != diagonal_direction::neither)
2163                 {
2164                     border.diagonal(diagonal);
2165                 }
2166 
2167                 while (in_element(qn("spreadsheetml", "border")))
2168                 {
2169                     auto current_side_element = expect_start_element(xml::content::complex);
2170 
2171                     xlnt::border::border_property side;
2172 
2173                     if (parser().attribute_present("style"))
2174                     {
2175                         side.style(parser().attribute<xlnt::border_style>("style"));
2176                     }
2177 
2178                     if (in_element(current_side_element))
2179                     {
2180                         expect_start_element(qn("spreadsheetml", "color"), xml::content::complex);
2181                         side.color(read_color());
2182                         expect_end_element(qn("spreadsheetml", "color"));
2183                     }
2184 
2185                     expect_end_element(current_side_element);
2186 
2187                     auto side_type = xml::value_traits<xlnt::border_side>::parse(current_side_element.name(), parser());
2188                     border.side(side_type, side);
2189                 }
2190 
2191                 expect_end_element(qn("spreadsheetml", "border"));
2192             }
2193 
2194             if (count != borders.size())
2195             {
2196                 throw xlnt::exception("border counts don't match");
2197             }
2198         }
2199         else if (current_style_element == qn("spreadsheetml", "fills"))
2200         {
2201             auto &fills = stylesheet.fills;
2202             auto count = parser().attribute<std::size_t>("count");
2203 
2204             while (in_element(qn("spreadsheetml", "fills")))
2205             {
2206                 fills.push_back(xlnt::fill());
2207                 auto &new_fill = fills.back();
2208 
2209                 expect_start_element(qn("spreadsheetml", "fill"), xml::content::complex);
2210                 auto fill_element = expect_start_element(xml::content::complex);
2211 
2212                 if (fill_element == qn("spreadsheetml", "patternFill"))
2213                 {
2214                     xlnt::pattern_fill pattern;
2215 
2216                     if (parser().attribute_present("patternType"))
2217                     {
2218                         pattern.type(parser().attribute<xlnt::pattern_fill_type>("patternType"));
2219 
2220                         while (in_element(qn("spreadsheetml", "patternFill")))
2221                         {
2222                             auto pattern_type_element = expect_start_element(xml::content::complex);
2223 
2224                             if (pattern_type_element == qn("spreadsheetml", "fgColor"))
2225                             {
2226                                 pattern.foreground(read_color());
2227                             }
2228                             else if (pattern_type_element == qn("spreadsheetml", "bgColor"))
2229                             {
2230                                 pattern.background(read_color());
2231                             }
2232                             else
2233                             {
2234                                 unexpected_element(pattern_type_element);
2235                             }
2236 
2237                             expect_end_element(pattern_type_element);
2238                         }
2239                     }
2240 
2241                     new_fill = pattern;
2242                 }
2243                 else if (fill_element == qn("spreadsheetml", "gradientFill"))
2244                 {
2245                     xlnt::gradient_fill gradient;
2246 
2247                     if (parser().attribute_present("type"))
2248                     {
2249                         gradient.type(parser().attribute<xlnt::gradient_fill_type>("type"));
2250                     }
2251                     else
2252                     {
2253                         gradient.type(xlnt::gradient_fill_type::linear);
2254                     }
2255 
2256                     while (in_element(qn("spreadsheetml", "gradientFill")))
2257                     {
2258                         expect_start_element(qn("spreadsheetml", "stop"), xml::content::complex);
2259                         auto position = converter_.deserialise(parser().attribute("position"));
2260                         expect_start_element(qn("spreadsheetml", "color"), xml::content::complex);
2261                         auto color = read_color();
2262                         expect_end_element(qn("spreadsheetml", "color"));
2263                         expect_end_element(qn("spreadsheetml", "stop"));
2264 
2265                         gradient.add_stop(position, color);
2266                     }
2267 
2268                     new_fill = gradient;
2269                 }
2270                 else
2271                 {
2272                     unexpected_element(fill_element);
2273                 }
2274 
2275                 expect_end_element(fill_element);
2276                 expect_end_element(qn("spreadsheetml", "fill"));
2277             }
2278 
2279             if (count != fills.size())
2280             {
2281                 throw xlnt::exception("counts don't match");
2282             }
2283         }
2284         else if (current_style_element == qn("spreadsheetml", "fonts"))
2285         {
2286             auto &fonts = stylesheet.fonts;
2287             auto count = parser().attribute<std::size_t>("count", 0);
2288 
2289             if (parser().attribute_present(qn("x14ac", "knownFonts")))
2290             {
2291                 target_.enable_known_fonts();
2292             }
2293 
2294             while (in_element(qn("spreadsheetml", "fonts")))
2295             {
2296                 fonts.push_back(xlnt::font());
2297                 auto &new_font = stylesheet.fonts.back();
2298 
2299                 expect_start_element(qn("spreadsheetml", "font"), xml::content::complex);
2300 
2301                 while (in_element(qn("spreadsheetml", "font")))
2302                 {
2303                     auto font_property_element = expect_start_element(xml::content::simple);
2304 
2305                     if (font_property_element == qn("spreadsheetml", "sz"))
2306                     {
2307                         new_font.size(converter_.deserialise(parser().attribute("val")));
2308                     }
2309                     else if (font_property_element == qn("spreadsheetml", "name"))
2310                     {
2311                         new_font.name(parser().attribute("val"));
2312                     }
2313                     else if (font_property_element == qn("spreadsheetml", "color"))
2314                     {
2315                         new_font.color(read_color());
2316                     }
2317                     else if (font_property_element == qn("spreadsheetml", "family"))
2318                     {
2319                         new_font.family(parser().attribute<std::size_t>("val"));
2320                     }
2321                     else if (font_property_element == qn("spreadsheetml", "scheme"))
2322                     {
2323                         new_font.scheme(parser().attribute("val"));
2324                     }
2325                     else if (font_property_element == qn("spreadsheetml", "b"))
2326                     {
2327                         if (parser().attribute_present("val"))
2328                         {
2329                             new_font.bold(is_true(parser().attribute("val")));
2330                         }
2331                         else
2332                         {
2333                             new_font.bold(true);
2334                         }
2335                     }
2336                     else if (font_property_element == qn("spreadsheetml", "vertAlign"))
2337                     {
2338                         auto vert_align = parser().attribute("val");
2339 
2340                         if (vert_align == "superscript")
2341                         {
2342                             new_font.superscript(true);
2343                         }
2344                         else if (vert_align == "subscript")
2345                         {
2346                             new_font.subscript(true);
2347                         }
2348                     }
2349                     else if (font_property_element == qn("spreadsheetml", "strike"))
2350                     {
2351                         if (parser().attribute_present("val"))
2352                         {
2353                             new_font.strikethrough(is_true(parser().attribute("val")));
2354                         }
2355                         else
2356                         {
2357                             new_font.strikethrough(true);
2358                         }
2359                     }
2360                     else if (font_property_element == qn("spreadsheetml", "outline"))
2361                     {
2362                         if (parser().attribute_present("val"))
2363                         {
2364                             new_font.outline(is_true(parser().attribute("val")));
2365                         }
2366                         else
2367                         {
2368                             new_font.outline(true);
2369                         }
2370                     }
2371                     else if (font_property_element == qn("spreadsheetml", "shadow"))
2372                     {
2373                         if (parser().attribute_present("val"))
2374                         {
2375                             new_font.shadow(is_true(parser().attribute("val")));
2376                         }
2377                         else
2378                         {
2379                             new_font.shadow(true);
2380                         }
2381                     }
2382                     else if (font_property_element == qn("spreadsheetml", "i"))
2383                     {
2384                         if (parser().attribute_present("val"))
2385                         {
2386                             new_font.italic(is_true(parser().attribute("val")));
2387                         }
2388                         else
2389                         {
2390                             new_font.italic(true);
2391                         }
2392                     }
2393                     else if (font_property_element == qn("spreadsheetml", "u"))
2394                     {
2395                         if (parser().attribute_present("val"))
2396                         {
2397                             new_font.underline(parser().attribute<xlnt::font::underline_style>("val"));
2398                         }
2399                         else
2400                         {
2401                             new_font.underline(xlnt::font::underline_style::single);
2402                         }
2403                     }
2404                     else if (font_property_element == qn("spreadsheetml", "charset"))
2405                     {
2406                         if (parser().attribute_present("val"))
2407                         {
2408                             parser().attribute("val");
2409                         }
2410                     }
2411                     else
2412                     {
2413                         unexpected_element(font_property_element);
2414                     }
2415 
2416                     expect_end_element(font_property_element);
2417                 }
2418 
2419                 expect_end_element(qn("spreadsheetml", "font"));
2420             }
2421 
2422             if (count != stylesheet.fonts.size())
2423             {
2424                 // throw xlnt::exception("counts don't match");
2425             }
2426         }
2427         else if (current_style_element == qn("spreadsheetml", "numFmts"))
2428         {
2429             auto &number_formats = stylesheet.number_formats;
2430             auto count = parser().attribute<std::size_t>("count");
2431 
2432             while (in_element(qn("spreadsheetml", "numFmts")))
2433             {
2434                 expect_start_element(qn("spreadsheetml", "numFmt"), xml::content::simple);
2435 
2436                 auto format_string = parser().attribute("formatCode");
2437 
2438                 if (format_string == "GENERAL")
2439                 {
2440                     format_string = "General";
2441                 }
2442 
2443                 xlnt::number_format nf;
2444 
2445                 nf.format_string(format_string);
2446                 nf.id(parser().attribute<std::size_t>("numFmtId"));
2447 
2448                 expect_end_element(qn("spreadsheetml", "numFmt"));
2449 
2450                 number_formats.push_back(nf);
2451             }
2452 
2453             if (count != number_formats.size())
2454             {
2455                 throw xlnt::exception("counts don't match");
2456             }
2457         }
2458         else if (current_style_element == qn("spreadsheetml", "cellStyles"))
2459         {
2460             auto count = parser().attribute<std::size_t>("count");
2461 
2462             while (in_element(qn("spreadsheetml", "cellStyles")))
2463             {
2464                 auto &data = *styles.emplace(styles.end());
2465 
2466                 expect_start_element(qn("spreadsheetml", "cellStyle"), xml::content::simple);
2467 
2468                 data.first.name = parser().attribute("name");
2469                 data.second = parser().attribute<std::size_t>("xfId");
2470 
2471                 if (parser().attribute_present("builtinId"))
2472                 {
2473                     data.first.builtin_id = parser().attribute<std::size_t>("builtinId");
2474                 }
2475 
2476                 if (parser().attribute_present("hidden"))
2477                 {
2478                     data.first.hidden_style = is_true(parser().attribute("hidden"));
2479                 }
2480 
2481                 if (parser().attribute_present("customBuiltin"))
2482                 {
2483                     data.first.custom_builtin = is_true(parser().attribute("customBuiltin"));
2484                 }
2485 
2486                 expect_end_element(qn("spreadsheetml", "cellStyle"));
2487             }
2488 
2489             if (count != styles.size())
2490             {
2491                 throw xlnt::exception("counts don't match");
2492             }
2493         }
2494         else if (current_style_element == qn("spreadsheetml", "cellStyleXfs")
2495             || current_style_element == qn("spreadsheetml", "cellXfs"))
2496         {
2497             auto in_style_records = current_style_element.name() == "cellStyleXfs";
2498             auto count = parser().attribute<std::size_t>("count");
2499 
2500             while (in_element(current_style_element))
2501             {
2502                 expect_start_element(qn("spreadsheetml", "xf"), xml::content::complex);
2503 
2504                 auto &record = *(!in_style_records
2505                         ? format_records.emplace(format_records.end())
2506                         : style_records.emplace(style_records.end()));
2507 
2508                 if (parser().attribute_present("applyBorder"))
2509                 {
2510                     record.first.border_applied = is_true(parser().attribute("applyBorder"));
2511                 }
2512                 record.first.border_id = parser().attribute_present("borderId")
2513                     ? parser().attribute<std::size_t>("borderId")
2514                     : optional<std::size_t>();
2515 
2516                 if (parser().attribute_present("applyFill"))
2517                 {
2518                     record.first.fill_applied = is_true(parser().attribute("applyFill"));
2519                 }
2520                 record.first.fill_id = parser().attribute_present("fillId")
2521                     ? parser().attribute<std::size_t>("fillId")
2522                     : optional<std::size_t>();
2523 
2524                 if (parser().attribute_present("applyFont"))
2525                 {
2526                     record.first.font_applied = is_true(parser().attribute("applyFont"));
2527                 }
2528                 record.first.font_id = parser().attribute_present("fontId")
2529                     ? parser().attribute<std::size_t>("fontId")
2530                     : optional<std::size_t>();
2531 
2532                 if (parser().attribute_present("applyNumberFormat"))
2533                 {
2534                     record.first.number_format_applied = is_true(parser().attribute("applyNumberFormat"));
2535                 }
2536                 record.first.number_format_id = parser().attribute_present("numFmtId")
2537                     ? parser().attribute<std::size_t>("numFmtId")
2538                     : optional<std::size_t>();
2539 
2540                 auto apply_alignment_present = parser().attribute_present("applyAlignment");
2541                 if (apply_alignment_present)
2542                 {
2543                     record.first.alignment_applied = is_true(parser().attribute("applyAlignment"));
2544                 }
2545 
2546                 auto apply_protection_present = parser().attribute_present("applyProtection");
2547                 if (apply_protection_present)
2548                 {
2549                     record.first.protection_applied = is_true(parser().attribute("applyProtection"));
2550                 }
2551 
2552                 record.first.pivot_button_ = parser().attribute_present("pivotButton")
2553                     && is_true(parser().attribute("pivotButton"));
2554                 record.first.quote_prefix_ = parser().attribute_present("quotePrefix")
2555                     && is_true(parser().attribute("quotePrefix"));
2556 
2557                 if (parser().attribute_present("xfId"))
2558                 {
2559                     record.second = parser().attribute<std::size_t>("xfId");
2560                 }
2561 
2562                 while (in_element(qn("spreadsheetml", "xf")))
2563                 {
2564                     auto xf_child_element = expect_start_element(xml::content::simple);
2565 
2566                     if (xf_child_element == qn("spreadsheetml", "alignment"))
2567                     {
2568                         record.first.alignment_id = stylesheet.alignments.size();
2569                         auto &alignment = *stylesheet.alignments.emplace(stylesheet.alignments.end());
2570 
2571                         if (parser().attribute_present("wrapText"))
2572                         {
2573                             alignment.wrap(is_true(parser().attribute("wrapText")));
2574                         }
2575 
2576                         if (parser().attribute_present("shrinkToFit"))
2577                         {
2578                             alignment.shrink(is_true(parser().attribute("shrinkToFit")));
2579                         }
2580 
2581                         if (parser().attribute_present("indent"))
2582                         {
2583                             alignment.indent(parser().attribute<int>("indent"));
2584                         }
2585 
2586                         if (parser().attribute_present("textRotation"))
2587                         {
2588                             alignment.rotation(parser().attribute<int>("textRotation"));
2589                         }
2590 
2591                         if (parser().attribute_present("vertical"))
2592                         {
2593                             alignment.vertical(parser().attribute<xlnt::vertical_alignment>("vertical"));
2594                         }
2595 
2596                         if (parser().attribute_present("horizontal"))
2597                         {
2598                             alignment.horizontal(parser().attribute<xlnt::horizontal_alignment>("horizontal"));
2599                         }
2600 
2601                         if (parser().attribute_present("readingOrder"))
2602                         {
2603                             parser().attribute<int>("readingOrder");
2604                         }
2605                     }
2606                     else if (xf_child_element == qn("spreadsheetml", "protection"))
2607                     {
2608                         record.first.protection_id = stylesheet.protections.size();
2609                         auto &protection = *stylesheet.protections.emplace(stylesheet.protections.end());
2610 
2611                         protection.locked(parser().attribute_present("locked")
2612                             && is_true(parser().attribute("locked")));
2613                         protection.hidden(parser().attribute_present("hidden")
2614                             && is_true(parser().attribute("hidden")));
2615                     }
2616                     else
2617                     {
2618                         unexpected_element(xf_child_element);
2619                     }
2620 
2621                     expect_end_element(xf_child_element);
2622                 }
2623 
2624                 expect_end_element(qn("spreadsheetml", "xf"));
2625             }
2626 
2627             if ((in_style_records && count != style_records.size())
2628                 || (!in_style_records && count != format_records.size()))
2629             {
2630                 throw xlnt::exception("counts don't match");
2631             }
2632         }
2633         else if (current_style_element == qn("spreadsheetml", "dxfs"))
2634         {
2635             auto count = parser().attribute<std::size_t>("count");
2636             std::size_t processed = 0;
2637 
2638             while (in_element(current_style_element))
2639             {
2640                 auto current_element = expect_start_element(xml::content::mixed);
2641                 skip_remaining_content(current_element);
2642                 expect_end_element(current_element);
2643                 ++processed;
2644             }
2645 
2646             if (count != processed)
2647             {
2648                 throw xlnt::exception("counts don't match");
2649             }
2650         }
2651         else if (current_style_element == qn("spreadsheetml", "tableStyles"))
2652         {
2653             skip_attribute("defaultTableStyle");
2654             skip_attribute("defaultPivotStyle");
2655 
2656             auto count = parser().attribute<std::size_t>("count");
2657             std::size_t processed = 0;
2658 
2659             while (in_element(qn("spreadsheetml", "tableStyles")))
2660             {
2661                 auto current_element = expect_start_element(xml::content::complex);
2662                 skip_remaining_content(current_element);
2663                 expect_end_element(current_element);
2664                 ++processed;
2665             }
2666 
2667             if (count != processed)
2668             {
2669                 throw xlnt::exception("counts don't match");
2670             }
2671         }
2672         else if (current_style_element == qn("spreadsheetml", "extLst"))
2673         {
2674             while (in_element(qn("spreadsheetml", "extLst")))
2675             {
2676                 expect_start_element(qn("spreadsheetml", "ext"), xml::content::complex);
2677 
2678                 const auto uri = parser().attribute("uri");
2679 
2680                 if (uri == "{EB79DEF2-80B8-43e5-95BD-54CBDDF9020C}") // slicerStyles
2681                 {
2682                     expect_start_element(qn("x14", "slicerStyles"), xml::content::simple);
2683                     stylesheet.default_slicer_style = parser().attribute("defaultSlicerStyle");
2684                     expect_end_element(qn("x14", "slicerStyles"));
2685                 }
2686                 else
2687                 {
2688                     skip_remaining_content(qn("spreadsheetml", "ext"));
2689                 }
2690 
2691                 expect_end_element(qn("spreadsheetml", "ext"));
2692             }
2693         }
2694         else if (current_style_element == qn("spreadsheetml", "colors")) // CT_Colors 0-1
2695         {
2696             while (in_element(qn("spreadsheetml", "colors")))
2697             {
2698                 auto colors_child_element = expect_start_element(xml::content::complex);
2699 
2700                 if (colors_child_element == qn("spreadsheetml", "indexedColors")) // CT_IndexedColors 0-1
2701                 {
2702                     while (in_element(colors_child_element))
2703                     {
2704                         expect_start_element(qn("spreadsheetml", "rgbColor"), xml::content::simple);
2705                         stylesheet.colors.push_back(read_color());
2706                         expect_end_element(qn("spreadsheetml", "rgbColor"));
2707                     }
2708                 }
2709                 else if (colors_child_element == qn("spreadsheetml", "mruColors")) // CT_MRUColors
2710                 {
2711                     skip_remaining_content(colors_child_element);
2712                 }
2713                 else
2714                 {
2715                     unexpected_element(colors_child_element);
2716                 }
2717 
2718                 expect_end_element(colors_child_element);
2719             }
2720         }
2721         else
2722         {
2723             unexpected_element(current_style_element);
2724         }
2725 
2726         expect_end_element(current_style_element);
2727     }
2728 
2729     expect_end_element(qn("spreadsheetml", "styleSheet"));
2730 
2731     std::size_t xf_id = 0;
2732 
2733     for (const auto &record : style_records)
2734     {
2735         auto style_iter = std::find_if(styles.begin(), styles.end(),
2736             [&xf_id](const std::pair<style_impl, std::size_t> &s) { return s.second == xf_id; });
2737         ++xf_id;
2738 
2739         if (style_iter == styles.end()) continue;
2740 
2741         auto new_style = stylesheet.create_style(style_iter->first.name);
2742 
2743         new_style.d_->pivot_button_ = style_iter->first.pivot_button_;
2744         new_style.d_->quote_prefix_ = style_iter->first.quote_prefix_;
2745         new_style.d_->formatting_record_id = style_iter->first.formatting_record_id;
2746         new_style.d_->hidden_style = style_iter->first.hidden_style;
2747         new_style.d_->custom_builtin = style_iter->first.custom_builtin;
2748         new_style.d_->hidden_style = style_iter->first.hidden_style;
2749         new_style.d_->builtin_id = style_iter->first.builtin_id;
2750         new_style.d_->outline_style = style_iter->first.outline_style;
2751 
2752         new_style.d_->alignment_applied = record.first.alignment_applied;
2753         new_style.d_->alignment_id = record.first.alignment_id;
2754         new_style.d_->border_applied = record.first.border_applied;
2755         new_style.d_->border_id = record.first.border_id;
2756         new_style.d_->fill_applied = record.first.fill_applied;
2757         new_style.d_->fill_id = record.first.fill_id;
2758         new_style.d_->font_applied = record.first.font_applied;
2759         new_style.d_->font_id = record.first.font_id;
2760         new_style.d_->number_format_applied = record.first.number_format_applied;
2761         new_style.d_->number_format_id = record.first.number_format_id;
2762     }
2763 
2764     std::size_t record_index = 0;
2765 
2766     for (const auto &record : format_records)
2767     {
2768         stylesheet.format_impls.push_back(format_impl());
2769         auto &new_format = stylesheet.format_impls.back();
2770 
2771         new_format.id = record_index++;
2772         new_format.parent = &stylesheet;
2773 
2774         ++new_format.references;
2775 
2776         new_format.alignment_id = record.first.alignment_id;
2777         new_format.alignment_applied = record.first.alignment_applied;
2778         new_format.border_id = record.first.border_id;
2779         new_format.border_applied = record.first.border_applied;
2780         new_format.fill_id = record.first.fill_id;
2781         new_format.fill_applied = record.first.fill_applied;
2782         new_format.font_id = record.first.font_id;
2783         new_format.font_applied = record.first.font_applied;
2784         new_format.number_format_id = record.first.number_format_id;
2785         new_format.number_format_applied = record.first.number_format_applied;
2786         new_format.protection_id = record.first.protection_id;
2787         new_format.protection_applied = record.first.protection_applied;
2788         new_format.pivot_button_ = record.first.pivot_button_;
2789         new_format.quote_prefix_ = record.first.quote_prefix_;
2790 
2791         set_style_by_xfid(styles, record.second, new_format.style);
2792     }
2793 }
2794 
read_theme()2795 void xlsx_consumer::read_theme()
2796 {
2797     auto workbook_rel = manifest().relationship(path("/"),
2798         relationship_type::office_document);
2799     auto theme_rel = manifest().relationship(workbook_rel.target().path(),
2800         relationship_type::theme);
2801     auto theme_path = manifest().canonicalize({workbook_rel, theme_rel});
2802 
2803     target_.theme(theme());
2804 
2805     if (manifest().has_relationship(theme_path, relationship_type::image))
2806     {
2807         read_part({workbook_rel, theme_rel,
2808             manifest().relationship(theme_path,
2809                 relationship_type::image)});
2810     }
2811 }
2812 
read_volatile_dependencies()2813 void xlsx_consumer::read_volatile_dependencies()
2814 {
2815 }
2816 
2817 // Sheet Relationship Target Parts
2818 
read_vml_drawings(worksheet)2819 void xlsx_consumer::read_vml_drawings(worksheet /*ws*/)
2820 {
2821 }
2822 
read_comments(worksheet ws)2823 void xlsx_consumer::read_comments(worksheet ws)
2824 {
2825     std::vector<std::string> authors;
2826 
2827     expect_start_element(qn("spreadsheetml", "comments"), xml::content::complex);
2828     // name space can be ignored
2829     skip_attribute(qn("mc", "Ignorable"));
2830     expect_start_element(qn("spreadsheetml", "authors"), xml::content::complex);
2831 
2832     while (in_element(qn("spreadsheetml", "authors")))
2833     {
2834         expect_start_element(qn("spreadsheetml", "author"), xml::content::simple);
2835         authors.push_back(read_text());
2836         expect_end_element(qn("spreadsheetml", "author"));
2837     }
2838 
2839     expect_end_element(qn("spreadsheetml", "authors"));
2840     expect_start_element(qn("spreadsheetml", "commentList"), xml::content::complex);
2841 
2842     while (in_element(xml::qname(qn("spreadsheetml", "commentList"))))
2843     {
2844         expect_start_element(qn("spreadsheetml", "comment"), xml::content::complex);
2845 
2846         skip_attribute("shapeId");
2847         auto cell_ref = parser().attribute("ref");
2848         auto author_id = parser().attribute<std::size_t>("authorId");
2849 
2850         expect_start_element(qn("spreadsheetml", "text"), xml::content::complex);
2851 
2852         ws.cell(cell_ref).comment(comment(read_rich_text(qn("spreadsheetml", "text")), authors.at(author_id)));
2853 
2854         expect_end_element(qn("spreadsheetml", "text"));
2855 
2856         if (in_element(xml::qname(qn("spreadsheetml", "comment"))))
2857         {
2858             expect_start_element(qn("mc", "AlternateContent"), xml::content::complex);
2859             skip_remaining_content(qn("mc", "AlternateContent"));
2860             expect_end_element(qn("mc", "AlternateContent"));
2861         }
2862 
2863         expect_end_element(qn("spreadsheetml", "comment"));
2864     }
2865 
2866     expect_end_element(qn("spreadsheetml", "commentList"));
2867     expect_end_element(qn("spreadsheetml", "comments"));
2868 }
2869 
read_drawings(worksheet ws,const path & part)2870 void xlsx_consumer::read_drawings(worksheet ws, const path &part)
2871 {
2872     auto images = manifest().relationships(part, relationship_type::image);
2873 
2874     auto sd = drawing::spreadsheet_drawing(parser());
2875 
2876     for (const auto &image_rel_id : sd.get_embed_ids())
2877     {
2878         auto image_rel = std::find_if(images.begin(), images.end(),
2879             [&](const relationship &r) { return r.id() == image_rel_id; });
2880 
2881         if (image_rel != images.end())
2882         {
2883             const auto url = image_rel->target().path().resolve(part.parent());
2884 
2885             read_image(url);
2886         }
2887     }
2888 
2889     ws.d_->drawing_ = sd;
2890 }
2891 
2892 // Unknown Parts
2893 
read_unknown_parts()2894 void xlsx_consumer::read_unknown_parts()
2895 {
2896 }
2897 
read_unknown_relationships()2898 void xlsx_consumer::read_unknown_relationships()
2899 {
2900 }
2901 
read_image(const xlnt::path & image_path)2902 void xlsx_consumer::read_image(const xlnt::path &image_path)
2903 {
2904     auto image_streambuf = archive_->open(image_path);
2905     vector_ostreambuf buffer(target_.d_->images_[image_path.string()]);
2906     std::ostream out_stream(&buffer);
2907     out_stream << image_streambuf.get();
2908 }
2909 
read_text()2910 std::string xlsx_consumer::read_text()
2911 {
2912     auto text = std::string();
2913 
2914     while (parser().peek() == xml::parser::event_type::characters)
2915     {
2916         parser().next_expect(xml::parser::event_type::characters);
2917         text.append(parser().value());
2918     }
2919 
2920     return text;
2921 }
2922 
read_variant()2923 variant xlsx_consumer::read_variant()
2924 {
2925     auto value = variant(read_text());
2926 
2927     if (in_element(stack_.back()))
2928     {
2929         auto element = expect_start_element(xml::content::mixed);
2930         auto text = read_text();
2931 
2932         if (element == qn("vt", "lpwstr") || element == qn("vt", "lpstr"))
2933         {
2934             value = variant(text);
2935         }
2936         if (element == qn("vt", "i4"))
2937         {
2938             value = variant(std::stoi(text));
2939         }
2940         if (element == qn("vt", "bool"))
2941         {
2942             value = variant(is_true(text));
2943         }
2944         else if (element == qn("vt", "vector"))
2945         {
2946             auto size = parser().attribute<std::size_t>("size");
2947             auto base_type = parser().attribute("baseType");
2948 
2949             std::vector<variant> vector;
2950 
2951             for (auto i = std::size_t(0); i < size; ++i)
2952             {
2953                 if (base_type == "variant")
2954                 {
2955                     expect_start_element(qn("vt", "variant"), xml::content::complex);
2956                 }
2957 
2958                 vector.push_back(read_variant());
2959 
2960                 if (base_type == "variant")
2961                 {
2962                     expect_end_element(qn("vt", "variant"));
2963                     read_text();
2964                 }
2965             }
2966 
2967             value = variant(vector);
2968         }
2969 
2970         expect_end_element(element);
2971         read_text();
2972     }
2973 
2974     return value;
2975 }
2976 
skip_attributes(const std::vector<std::string> & names)2977 void xlsx_consumer::skip_attributes(const std::vector<std::string> &names)
2978 {
2979     for (const auto &name : names)
2980     {
2981         if (parser().attribute_present(name))
2982         {
2983             parser().attribute(name);
2984         }
2985     }
2986 }
2987 
skip_attributes(const std::vector<xml::qname> & names)2988 void xlsx_consumer::skip_attributes(const std::vector<xml::qname> &names)
2989 {
2990     for (const auto &name : names)
2991     {
2992         if (parser().attribute_present(name))
2993         {
2994             parser().attribute(name);
2995         }
2996     }
2997 }
2998 
skip_attributes()2999 void xlsx_consumer::skip_attributes()
3000 {
3001     parser().attribute_map();
3002 }
3003 
skip_attribute(const xml::qname & name)3004 void xlsx_consumer::skip_attribute(const xml::qname &name)
3005 {
3006     if (parser().attribute_present(name))
3007     {
3008         parser().attribute(name);
3009     }
3010 }
3011 
skip_attribute(const std::string & name)3012 void xlsx_consumer::skip_attribute(const std::string &name)
3013 {
3014     if (parser().attribute_present(name))
3015     {
3016         parser().attribute(name);
3017     }
3018 }
3019 
skip_remaining_content(const xml::qname & name)3020 void xlsx_consumer::skip_remaining_content(const xml::qname &name)
3021 {
3022     // start by assuming we've already parsed the opening tag
3023 
3024     skip_attributes();
3025     read_text();
3026 
3027     // continue until the closing tag is reached
3028     while (in_element(name))
3029     {
3030         auto child_element = expect_start_element(xml::content::mixed);
3031         skip_remaining_content(child_element);
3032         expect_end_element(child_element);
3033         read_text(); // trailing character content (usually whitespace)
3034     }
3035 }
3036 
in_element(const xml::qname & name)3037 bool xlsx_consumer::in_element(const xml::qname &name)
3038 {
3039     return parser().peek() != xml::parser::event_type::end_element
3040         && stack_.back() == name;
3041 }
3042 
expect_start_element(xml::content content)3043 xml::qname xlsx_consumer::expect_start_element(xml::content content)
3044 {
3045     parser().next_expect(xml::parser::event_type::start_element);
3046     parser().content(content);
3047     stack_.push_back(parser().qname());
3048 
3049     const auto xml_space = qn("xml", "space");
3050     preserve_space_ = parser().attribute_present(xml_space) ? parser().attribute(xml_space) == "preserve" : false;
3051 
3052     return stack_.back();
3053 }
3054 
expect_start_element(const xml::qname & name,xml::content content)3055 void xlsx_consumer::expect_start_element(const xml::qname &name, xml::content content)
3056 {
3057     parser().next_expect(xml::parser::event_type::start_element, name);
3058     parser().content(content);
3059     stack_.push_back(name);
3060 
3061     const auto xml_space = qn("xml", "space");
3062     preserve_space_ = parser().attribute_present(xml_space) ? parser().attribute(xml_space) == "preserve" : false;
3063 }
3064 
expect_end_element(const xml::qname & name)3065 void xlsx_consumer::expect_end_element(const xml::qname &name)
3066 {
3067     parser().attribute_map();
3068     parser().next_expect(xml::parser::event_type::end_element, name);
3069     stack_.pop_back();
3070 }
3071 
unexpected_element(const xml::qname & name)3072 void xlsx_consumer::unexpected_element(const xml::qname &name)
3073 {
3074 #ifdef THROW_ON_INVALID_XML
3075     throw xlnt::exception(name.string());
3076 #else
3077     skip_remaining_content(name);
3078 #endif
3079 }
3080 
read_rich_text(const xml::qname & parent)3081 rich_text xlsx_consumer::read_rich_text(const xml::qname &parent)
3082 {
3083     const auto &xmlns = parent.namespace_();
3084     rich_text t;
3085 
3086     while (in_element(parent))
3087     {
3088         auto text_element = expect_start_element(xml::content::mixed);
3089         const auto xml_space = qn("xml", "space");
3090         const auto preserve_space = parser().attribute_present(xml_space)
3091             ? parser().attribute(xml_space) == "preserve"
3092             : false;
3093         skip_attributes();
3094         auto text = read_text();
3095 
3096         if (text_element == xml::qname(xmlns, "t"))
3097         {
3098             t.plain_text(text, preserve_space);
3099         }
3100         else if (text_element == xml::qname(xmlns, "r"))
3101         {
3102             rich_text_run run;
3103             run.preserve_space = preserve_space;
3104 
3105             while (in_element(xml::qname(xmlns, "r")))
3106             {
3107                 auto run_element = expect_start_element(xml::content::mixed);
3108                 auto run_text = read_text();
3109 
3110                 if (run_element == xml::qname(xmlns, "rPr"))
3111                 {
3112                     run.second = xlnt::font();
3113 
3114                     while (in_element(xml::qname(xmlns, "rPr")))
3115                     {
3116                         auto current_run_property_element = expect_start_element(xml::content::simple);
3117 
3118                         if (current_run_property_element == xml::qname(xmlns, "sz"))
3119                         {
3120                             run.second.get().size(converter_.deserialise(parser().attribute("val")));
3121                         }
3122                         else if (current_run_property_element == xml::qname(xmlns, "rFont"))
3123                         {
3124                             run.second.get().name(parser().attribute("val"));
3125                         }
3126                         else if (current_run_property_element == xml::qname(xmlns, "color"))
3127                         {
3128                             run.second.get().color(read_color());
3129                         }
3130                         else if (current_run_property_element == xml::qname(xmlns, "family"))
3131                         {
3132                             run.second.get().family(parser().attribute<std::size_t>("val"));
3133                         }
3134                         else if (current_run_property_element == xml::qname(xmlns, "charset"))
3135                         {
3136                             run.second.get().charset(parser().attribute<std::size_t>("val"));
3137                         }
3138                         else if (current_run_property_element == xml::qname(xmlns, "scheme"))
3139                         {
3140                             run.second.get().scheme(parser().attribute("val"));
3141                         }
3142                         else if (current_run_property_element == xml::qname(xmlns, "b"))
3143                         {
3144                             run.second.get().bold(parser().attribute_present("val")
3145                                     ? is_true(parser().attribute("val"))
3146                                     : true);
3147                         }
3148                         else if (current_run_property_element == xml::qname(xmlns, "i"))
3149                         {
3150                             run.second.get().italic(parser().attribute_present("val")
3151                                     ? is_true(parser().attribute("val"))
3152                                     : true);
3153                         }
3154                         else if (current_run_property_element == xml::qname(xmlns, "u"))
3155                         {
3156                             if (parser().attribute_present("val"))
3157                             {
3158                                 run.second.get().underline(parser().attribute<font::underline_style>("val"));
3159                             }
3160                             else
3161                             {
3162                                 run.second.get().underline(font::underline_style::single);
3163                             }
3164                         }
3165                         else if (current_run_property_element == xml::qname(xmlns, "strike"))
3166                         {
3167                             run.second.get().strikethrough(parser().attribute_present("val")
3168                                     ? is_true(parser().attribute("val"))
3169                                     : true);
3170                         }
3171                         else
3172                         {
3173                             unexpected_element(current_run_property_element);
3174                         }
3175 
3176                         expect_end_element(current_run_property_element);
3177                         read_text();
3178                     }
3179                 }
3180                 else if (run_element == xml::qname(xmlns, "t"))
3181                 {
3182                     run.first = run_text;
3183                 }
3184                 else
3185                 {
3186                     unexpected_element(run_element);
3187                 }
3188 
3189                 read_text();
3190                 expect_end_element(run_element);
3191                 read_text();
3192             }
3193 
3194             t.add_run(run);
3195         }
3196         else if (text_element == xml::qname(xmlns, "rPh"))
3197         {
3198             phonetic_run pr;
3199             pr.start = parser().attribute<std::uint32_t>("sb");
3200             pr.end = parser().attribute<std::uint32_t>("eb");
3201 
3202             expect_start_element(xml::qname(xmlns, "t"), xml::content::simple);
3203             pr.text = read_text();
3204 
3205             if (parser().attribute_present(xml_space))
3206             {
3207                 pr.preserve_space = parser().attribute(xml_space) == "preserve";
3208             }
3209 
3210             expect_end_element(xml::qname(xmlns, "t"));
3211 
3212             t.add_phonetic_run(pr);
3213         }
3214         else if (text_element == xml::qname(xmlns, "phoneticPr"))
3215         {
3216             phonetic_pr ph(parser().attribute<phonetic_pr::font_id_t>("fontId"));
3217             if (parser().attribute_present("type"))
3218             {
3219                 ph.type(phonetic_pr::type_from_string(parser().attribute("type")));
3220             }
3221             if (parser().attribute_present("alignment"))
3222             {
3223                 ph.alignment(phonetic_pr::alignment_from_string(parser().attribute("alignment")));
3224             }
3225             t.phonetic_properties(ph);
3226         }
3227         else
3228         {
3229             unexpected_element(text_element);
3230         }
3231 
3232         read_text();
3233         expect_end_element(text_element);
3234     }
3235 
3236     return t;
3237 }
3238 
read_color()3239 xlnt::color xlsx_consumer::read_color()
3240 {
3241     xlnt::color result;
3242 
3243     if (parser().attribute_present("auto") && is_true(parser().attribute("auto")))
3244     {
3245         result.auto_(true);
3246         return result;
3247     }
3248 
3249     if (parser().attribute_present("rgb"))
3250     {
3251         result = xlnt::rgb_color(parser().attribute("rgb"));
3252     }
3253     else if (parser().attribute_present("theme"))
3254     {
3255         result = xlnt::theme_color(parser().attribute<std::size_t>("theme"));
3256     }
3257     else if (parser().attribute_present("indexed"))
3258     {
3259         result = xlnt::indexed_color(parser().attribute<std::size_t>("indexed"));
3260     }
3261 
3262     if (parser().attribute_present("tint"))
3263     {
3264         result.tint(converter_.deserialise(parser().attribute("tint")));
3265     }
3266 
3267     return result;
3268 }
3269 
manifest()3270 manifest &xlsx_consumer::manifest()
3271 {
3272     return target_.manifest();
3273 }
3274 
3275 } // namespace detail
3276 } // namespace xlnt
3277