1 // This is brl/bbas/bxml/bxml_read.cxx
2 //:
3 // \file
4 // \author Matt Leotta
5 // \date   October 5, 2006
6 
7 #include <iostream>
8 #include <deque>
9 #include <utility>
10 #include <fstream>
11 #include "bxml_read.h"
12 #ifdef _MSC_VER
13 #  include "vcl_msvc_warnings.h"
14 #endif
15 #include <cassert>
16 #include "vul/vul_file.h"
17 #ifdef WIN32
18  #define _LIB
19 #endif
20 #include <expatpp.h>
21 
22 // anonymous namespace
23 namespace {
24 
25 class bxml_expat_parser : public expatpp
26 {
27  public:
bxml_expat_parser(bool online=false)28   bxml_expat_parser(bool online = false) : online_mode_(online) {}
29   void startElement(const XML_Char* name, const XML_Char** atts) override;
30   void endElement(const XML_Char* name) override;
31   void charData(const XML_Char*, int len) override;
32   void xmlDecl( const XML_Char *version,
33                         const XML_Char *encoding,
34                         int            standalone) override;
35 
document() const36   bxml_document document() const { return document_; }
37 
38   bool pop_complete_data(bxml_data_sptr& data, unsigned int& depth);
39 
40  private:
41   bool online_mode_;
42   std::vector<bxml_data_sptr> stack_;
43   std::deque<std::pair<bxml_data_sptr,unsigned int> > complete_;
44   bxml_document document_;
45 };
46 
pop_complete_data(bxml_data_sptr & data,unsigned int & depth)47 bool bxml_expat_parser::pop_complete_data(bxml_data_sptr& data, unsigned int& depth)
48 {
49   if (complete_.empty())
50     return false;
51 
52   data = complete_.front().first;
53   depth = complete_.front().second;
54   complete_.pop_front();
55   return true;
56 }
57 
58 
59 //: Handle the start of elements
startElement(const XML_Char * name,const XML_Char ** atts)60 void bxml_expat_parser::startElement(const XML_Char* name, const XML_Char** atts)
61 {
62   bxml_element* element = new bxml_element(name);
63   bxml_data_sptr data(element);
64   // set all the attributes
65   for (int i=0; atts[i]; i+=2) {
66     element->set_attribute(atts[i],atts[i+1]);
67   }
68 
69   // add this element to the current element or document
70   if (stack_.empty()) {
71     if (!online_mode_) {
72       document_.set_root_element(data);
73       stack_.push_back(data);
74     }
75     else
76       stack_.emplace_back(nullptr);
77   }
78   else{
79     if (stack_.back().ptr()) {
80       auto* parent = static_cast<bxml_element*>(stack_.back().ptr());
81       parent->append_data(data);
82     }
83     stack_.push_back(data);
84   }
85 }
86 
87 
88 //: Handle the start of elements
endElement(const XML_Char * name)89 void bxml_expat_parser::endElement(const XML_Char* name)
90 {
91   if (stack_.back().ptr()) {
92     assert(static_cast<bxml_element*>(stack_.back().ptr())->name() == std::string(name));
93     complete_.emplace_back(stack_.back(),stack_.size()-1);
94   }
95   stack_.pop_back();
96 }
97 
98 
99 //: Handle character data
charData(const XML_Char * text,int len)100 void bxml_expat_parser::charData(const XML_Char* text, int len)
101 {
102   assert(!stack_.empty());
103   if (stack_.back().ptr()) {
104     auto* parent = static_cast<bxml_element*>(stack_.back().ptr());
105     parent->append_text(std::string(text,len));
106   }
107 }
108 
109 
110 //: Handle the XML declaration
xmlDecl(const XML_Char * version,const XML_Char * encoding,int standalone)111 void bxml_expat_parser::xmlDecl( const XML_Char *version,
112                                  const XML_Char *encoding,
113                                  int            standalone)
114 {
115   document_.set_version(version);
116   document_.set_encoding(encoding);
117   document_.set_standalone(standalone != 0);
118 }
119 
120 }; // end anonymous namespace
121 
122 
123 //: Read the entire contents of \p filepath into an XML document class
bxml_read(const std::string & filepath)124 bxml_document bxml_read(const std::string& filepath)
125 {
126   if (!vul_file::exists(filepath))
127     std::cerr<< "In bxml_read: " << vul_file::get_cwd() << filepath << " does not exist\n";
128   std::ifstream file(filepath.c_str());
129   return bxml_read(file);
130 }
131 
132 
133 //: Read the entire data stream \p is into an XML document class
bxml_read(std::istream & is)134 bxml_document bxml_read(std::istream& is)
135 {
136   bxml_expat_parser parser;
137 
138   char buf[4096];
139   //char buf[9096];
140   int done;
141 
142   while (is.good()) {
143     is.get(buf,sizeof(buf),0);
144     unsigned int n = is.gcount();
145 
146     done = (n+1 < sizeof(buf)) ? 1 : 0;
147 
148     if (parser.XML_Parse(buf,n,done) != XML_STATUS_OK ) {
149       std::cerr << "Error parsing\n";
150       break;
151     }
152   }
153   return parser.document();
154 }
155 
156 
157 class bxml_stream_read::pimpl
158 {
159  public:
pimpl(unsigned int max_depth)160   pimpl(unsigned int max_depth) : parser(true), depth(max_depth) {}
161 
162   bxml_expat_parser parser;
163   unsigned int depth;
164 };
165 
166 //: Constructor
bxml_stream_read(int max_depth)167 bxml_stream_read::bxml_stream_read(int max_depth)
168   : p_(new pimpl(max_depth))
169 {
170 }
171 
172 //: Destructor
~bxml_stream_read()173 bxml_stream_read::~bxml_stream_read()
174 {
175   delete p_;
176 }
177 
178 
179 //: Reset the state of the reader
reset()180 void bxml_stream_read::reset()
181 {
182   if (p_) {
183     unsigned int depth = p_->depth;
184     delete p_;
185     p_ = new pimpl(depth);
186   }
187 }
188 
189 
190 //: Read the next element
191 bxml_data_sptr
next_element(std::istream & is,unsigned int & depth)192 bxml_stream_read::next_element(std::istream& is, unsigned int& depth)
193 {
194   char buf[4096];
195   int done = 0;
196 
197   bxml_data_sptr data = nullptr;
198   depth = 0;
199   while ( p_->parser.pop_complete_data(data, depth) )
200     if (depth <= p_->depth)
201       return data;
202 
203   while (is.good()){
204     is.get(buf,sizeof(buf),0);
205     int n = is.gcount();
206     if (p_->parser.XML_Parse(buf,n,done) != XML_STATUS_OK ) {
207       std::cerr << "Error parsing\n";
208       break;
209     }
210 
211     while ( p_->parser.pop_complete_data(data, depth) )
212       if (depth <= p_->depth)
213         return data;
214   }
215   return nullptr;
216 }
217