1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <orcusfiltersimpl.hxx>
11 #include <orcusinterface.hxx>
12 #include <orcusxml.hxx>
13 #include <document.hxx>
14 #include <tokenarray.hxx>
15 
16 #include <vcl/weld.hxx>
17 #include <ucbhelper/content.hxx>
18 #include <sal/log.hxx>
19 #include <osl/file.hxx>
20 
21 #include <orcus/xml_structure_tree.hpp>
22 #include <orcus/xml_namespace.hpp>
23 #include <orcus/orcus_xml.hpp>
24 #include <orcus/sax_parser_base.hpp>
25 #include <orcus/stream.hpp>
26 
27 #include <com/sun/star/io/XInputStream.hpp>
28 #include <comphelper/processfactory.hxx>
29 
30 #include <string>
31 #include <sstream>
32 
33 namespace com::sun::star::ucb { class XCommandEnvironment; }
34 
35 #define BUFFER_SIZE 4096
36 
37 using namespace com::sun::star;
38 
39 namespace {
40 
setUserDataToEntry(weld::TreeView & rControl,const weld::TreeIter & rEntry,ScOrcusXMLTreeParam::UserDataStoreType & rStore,ScOrcusXMLTreeParam::EntryType eType)41 ScOrcusXMLTreeParam::EntryData& setUserDataToEntry(weld::TreeView& rControl,
42     const weld::TreeIter& rEntry, ScOrcusXMLTreeParam::UserDataStoreType& rStore, ScOrcusXMLTreeParam::EntryType eType)
43 {
44     rStore.push_back(std::make_unique<ScOrcusXMLTreeParam::EntryData>(eType));
45     rControl.set_id(rEntry, OUString::number(reinterpret_cast<sal_Int64>(rStore.back().get())));
46     return *rStore.back();
47 }
48 
setEntityNameToUserData(ScOrcusXMLTreeParam::EntryData & rEntryData,const orcus::xml_structure_tree::entity_name & entity,const orcus::xml_structure_tree::walker & walker)49 void setEntityNameToUserData(
50     ScOrcusXMLTreeParam::EntryData& rEntryData,
51     const orcus::xml_structure_tree::entity_name& entity, const orcus::xml_structure_tree::walker& walker)
52 {
53     rEntryData.mnNamespaceID = walker.get_xmlns_index(entity.ns);
54 }
55 
toString(const orcus::xml_structure_tree::entity_name & entity,const orcus::xml_structure_tree::walker & walker)56 OUString toString(const orcus::xml_structure_tree::entity_name& entity, const orcus::xml_structure_tree::walker& walker)
57 {
58     OUStringBuffer aBuf;
59     if (entity.ns)
60     {
61         // Namespace exists.  Use the short version of the xml namespace name for display.
62         std::string aShortName = walker.get_xmlns_short_name(entity.ns);
63         aBuf.appendAscii(aShortName.c_str());
64         aBuf.append(':');
65     }
66     aBuf.append(OUString(entity.name.get(), entity.name.size(), RTL_TEXTENCODING_UTF8));
67     return aBuf.makeStringAndClear();
68 }
69 
populateTree(weld::TreeView & rTreeCtrl,orcus::xml_structure_tree::walker & rWalker,const orcus::xml_structure_tree::entity_name & rElemName,bool bRepeat,const weld::TreeIter * pParent,ScOrcusXMLTreeParam & rParam)70 void populateTree(
71    weld::TreeView& rTreeCtrl, orcus::xml_structure_tree::walker& rWalker,
72    const orcus::xml_structure_tree::entity_name& rElemName, bool bRepeat,
73    const weld::TreeIter* pParent, ScOrcusXMLTreeParam& rParam)
74 {
75     OUString sEntry(toString(rElemName, rWalker));
76     std::unique_ptr<weld::TreeIter> xEntry(rTreeCtrl.make_iterator());
77     rTreeCtrl.insert(pParent, -1, &sEntry, nullptr, nullptr, nullptr, false, xEntry.get());
78     rTreeCtrl.set_image(*xEntry, rParam.maImgElementDefault, -1);
79 
80     ScOrcusXMLTreeParam::EntryData& rEntryData = setUserDataToEntry(rTreeCtrl,
81         *xEntry, rParam.m_UserDataStore,
82         bRepeat ? ScOrcusXMLTreeParam::ElementRepeat : ScOrcusXMLTreeParam::ElementDefault);
83 
84     setEntityNameToUserData(rEntryData, rElemName, rWalker);
85 
86     if (bRepeat)
87     {
88         // Recurring elements use different icon.
89        rTreeCtrl.set_image(*xEntry, rParam.maImgElementRepeat, -1);
90     }
91 
92     orcus::xml_structure_tree::entity_names_type aNames = rWalker.get_attributes();
93 
94     // Insert attributes.
95     for (const orcus::xml_structure_tree::entity_name& rAttrName : aNames)
96     {
97         OUString sAttr(toString(rAttrName, rWalker));
98         std::unique_ptr<weld::TreeIter> xAttr(rTreeCtrl.make_iterator());
99         rTreeCtrl.insert(xEntry.get(), -1, &sAttr, nullptr, nullptr, nullptr, false, xAttr.get());
100 
101         ScOrcusXMLTreeParam::EntryData& rAttrData =
102             setUserDataToEntry(rTreeCtrl, *xAttr, rParam.m_UserDataStore, ScOrcusXMLTreeParam::Attribute);
103         setEntityNameToUserData(rAttrData, rAttrName, rWalker);
104 
105         rTreeCtrl.set_image(*xAttr, rParam.maImgAttribute, -1);
106     }
107 
108     aNames = rWalker.get_children();
109 
110     // Non-leaf if it has child elements, leaf otherwise.
111     rEntryData.mbLeafNode = aNames.empty();
112 
113     // Insert child elements recursively.
114     for (const auto& rName : aNames)
115     {
116         orcus::xml_structure_tree::element aElem = rWalker.descend(rName);
117         populateTree(rTreeCtrl, rWalker, rName, aElem.repeat, xEntry.get(), rParam);
118         rWalker.ascend();
119     }
120 }
121 
122 class TreeUpdateSwitch
123 {
124     weld::TreeView& mrTreeCtrl;
125 public:
TreeUpdateSwitch(weld::TreeView & rTreeCtrl)126     explicit TreeUpdateSwitch(weld::TreeView& rTreeCtrl) : mrTreeCtrl(rTreeCtrl)
127     {
128         mrTreeCtrl.freeze();
129     }
130 
~TreeUpdateSwitch()131     ~TreeUpdateSwitch()
132     {
133         mrTreeCtrl.thaw();
134     }
135 };
136 
loadContentFromURL(const OUString & rURL,std::string & rStrm)137 void loadContentFromURL(const OUString& rURL, std::string& rStrm)
138 {
139     ucbhelper::Content aContent(
140         rURL, uno::Reference<ucb::XCommandEnvironment>(), comphelper::getProcessComponentContext());
141     uno::Reference<io::XInputStream> xStrm = aContent.openStream();
142 
143     std::ostringstream aStrmBuf;
144     uno::Sequence<sal_Int8> aBytes;
145     size_t nBytesRead = 0;
146     do
147     {
148         nBytesRead = xStrm->readBytes(aBytes, BUFFER_SIZE);
149         const sal_Int8* p = aBytes.getConstArray();
150         aStrmBuf << std::string(p, p + nBytesRead);
151     }
152     while (nBytesRead == BUFFER_SIZE);
153 
154     rStrm = aStrmBuf.str();
155 }
156 
157 }
158 
ScOrcusXMLContextImpl(ScDocument & rDoc,const OUString & rPath)159 ScOrcusXMLContextImpl::ScOrcusXMLContextImpl(ScDocument& rDoc, const OUString& rPath) :
160     ScOrcusXMLContext(), mrDoc(rDoc), maPath(rPath) {}
161 
~ScOrcusXMLContextImpl()162 ScOrcusXMLContextImpl::~ScOrcusXMLContextImpl() {}
163 
loadXMLStructure(weld::TreeView & rTreeCtrl,ScOrcusXMLTreeParam & rParam)164 void ScOrcusXMLContextImpl::loadXMLStructure(weld::TreeView& rTreeCtrl, ScOrcusXMLTreeParam& rParam)
165 {
166     rParam.m_UserDataStore.clear();
167 
168     std::string aStrm;
169     loadContentFromURL(maPath, aStrm);
170 
171     if (aStrm.empty())
172         return;
173 
174     orcus::xmlns_context cxt = maNsRepo.create_context();
175     orcus::xml_structure_tree aXmlTree(cxt);
176     try
177     {
178         aXmlTree.parse(&aStrm[0], aStrm.size());
179 
180         TreeUpdateSwitch aSwitch(rTreeCtrl);
181         rTreeCtrl.clear();
182 
183         orcus::xml_structure_tree::walker aWalker = aXmlTree.get_walker();
184 
185         // Root element.
186         orcus::xml_structure_tree::element aElem = aWalker.root();
187         populateTree(rTreeCtrl, aWalker, aElem.name, aElem.repeat, nullptr, rParam);
188     }
189     catch (const orcus::sax::malformed_xml_error& e)
190     {
191         SAL_WARN("sc.orcus", "Malformed XML error: " << e.what());
192     }
193     catch (const std::exception& e)
194     {
195         SAL_WARN("sc.orcus", "parsing failed with an unknown error " << e.what());
196     }
197 
198     rTreeCtrl.all_foreach([&rTreeCtrl](weld::TreeIter& rEntry){
199         rTreeCtrl.expand_row(rEntry);
200         return false;
201     });
202 }
203 
204 namespace {
205 
206 class SetNamespaceAlias
207 {
208     orcus::orcus_xml& mrFilter;
209     orcus::xmlns_repository& mrNsRepo;
210 public:
SetNamespaceAlias(orcus::orcus_xml & filter,orcus::xmlns_repository & repo)211     SetNamespaceAlias(orcus::orcus_xml& filter, orcus::xmlns_repository& repo) :
212         mrFilter(filter), mrNsRepo(repo) {}
213 
operator ()(size_t index)214     void operator() (size_t index)
215     {
216         orcus::xmlns_id_t nsid = mrNsRepo.get_identifier(index);
217         if (nsid == orcus::XMLNS_UNKNOWN_ID)
218             return;
219 
220         std::string alias = mrNsRepo.get_short_name(index);
221         mrFilter.set_namespace_alias(alias.c_str(), nsid);
222     }
223 };
224 
225 }
226 
importXML(const ScOrcusImportXMLParam & rParam)227 void ScOrcusXMLContextImpl::importXML(const ScOrcusImportXMLParam& rParam)
228 {
229     ScOrcusFactory aFactory(mrDoc, true);
230 
231     OUString aSysPath;
232     if (osl::FileBase::getSystemPathFromFileURL(maPath, aSysPath) != osl::FileBase::E_None)
233         return;
234 
235     OString aOSysPath = OUStringToOString(aSysPath, RTL_TEXTENCODING_UTF8);
236     const char* path = aOSysPath.getStr();
237 
238     try
239     {
240         orcus::orcus_xml filter(maNsRepo, &aFactory, nullptr);
241 
242         // Define all used namespaces.
243         std::for_each(rParam.maNamespaces.begin(), rParam.maNamespaces.end(), SetNamespaceAlias(filter, maNsRepo));
244 
245         // Set cell links.
246         for (const ScOrcusImportXMLParam::CellLink& rLink : rParam.maCellLinks)
247         {
248             OUString aTabName;
249             mrDoc.GetName(rLink.maPos.Tab(), aTabName);
250             filter.set_cell_link(
251                 rLink.maPath.getStr(),
252                 OUStringToOString(aTabName, RTL_TEXTENCODING_UTF8).getStr(),
253                 rLink.maPos.Row(), rLink.maPos.Col());
254         }
255 
256         // Set range links.
257         for (const ScOrcusImportXMLParam::RangeLink& rLink : rParam.maRangeLinks)
258         {
259             OUString aTabName;
260             mrDoc.GetName(rLink.maPos.Tab(), aTabName);
261             filter.start_range(
262                 OUStringToOString(aTabName, RTL_TEXTENCODING_UTF8).getStr(),
263                 rLink.maPos.Row(), rLink.maPos.Col());
264 
265             std::for_each(rLink.maFieldPaths.begin(), rLink.maFieldPaths.end(),
266                 [&filter](const OString& rFieldPath)
267                 {
268                     filter.append_field_link(rFieldPath.getStr(), orcus::pstring());
269                 }
270             );
271 
272             std::for_each(rLink.maRowGroups.begin(), rLink.maRowGroups.end(),
273                 [&filter] (const OString& rRowGroup)
274                 {
275                     filter.set_range_row_group(rRowGroup.getStr());
276                 }
277             );
278 
279             filter.commit_range();
280         }
281 
282         orcus::file_content content(path);
283         filter.read_stream(content.data(), content.size());
284 
285         aFactory.finalize();
286     }
287     catch (const std::exception&)
288     {
289     }
290 }
291 
292 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
293