1 #ifndef OSMIUM_RELATIONS_COLLECTOR_HPP
2 #define OSMIUM_RELATIONS_COLLECTOR_HPP
3 
4 /*
5 
6 This file is part of Osmium (https://osmcode.org/libosmium).
7 
8 Copyright 2013-2021 Jochen Topf <jochen@topf.org> and others (see README).
9 
10 Boost Software License - Version 1.0 - August 17th, 2003
11 
12 Permission is hereby granted, free of charge, to any person or organization
13 obtaining a copy of the software and accompanying documentation covered by
14 this license (the "Software") to use, reproduce, display, distribute,
15 execute, and transmit the Software, and to prepare derivative works of the
16 Software, and to permit third-parties to whom the Software is furnished to
17 do so, all subject to the following:
18 
19 The copyright notices in the Software and this entire statement, including
20 the above license grant, this restriction and the following disclaimer,
21 must be included in all copies of the Software, in whole or in part, and
22 all derivative works of the Software, unless such copies or derivative
23 works are solely in the form of machine-executable object code generated by
24 a source language processor.
25 
26 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
29 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
30 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
31 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
32 DEALINGS IN THE SOFTWARE.
33 
34 */
35 
36 #include <osmium/handler.hpp>
37 #include <osmium/handler/check_order.hpp>
38 #include <osmium/memory/buffer.hpp>
39 #include <osmium/osm/item_type.hpp>
40 #include <osmium/osm/object.hpp>
41 #include <osmium/osm/relation.hpp>
42 #include <osmium/osm/types.hpp>
43 #include <osmium/relations/detail/member_meta.hpp>
44 #include <osmium/relations/detail/relation_meta.hpp>
45 #include <osmium/util/iterator.hpp>
46 #include <osmium/visitor.hpp>
47 
48 #include <algorithm>
49 #include <array>
50 #include <cassert>
51 #include <cstddef>
52 #include <cstdint>
53 #include <functional>
54 #include <iomanip>
55 #include <iostream>
56 #include <utility>
57 #include <vector>
58 
59 namespace osmium {
60 
61     class Node;
62     class Way;
63 
64     /**
65      * @brief Code related to the assembly of OSM relations
66      */
67     namespace relations {
68 
69         /**
70          * The Collector class collects members of a relation. This is a
71          * generic base class that can be used to assemble all kinds of
72          * relations. It has numerous hooks you can implement in derived
73          * classes to customize its behaviour.
74          *
75          * The collector provides two handlers (HandlerPass1 and HandlerPass2)
76          * for a first and second pass through an input file, respectively. In
77          * the first pass all relations we are interested in are stored in
78          * RelationMeta objects in the m_relations vector. All members we are
79          * interested in are stored in MemberMeta objects in the m_member_meta
80          * vectors. The MemberMeta objects also store the information where the
81          * relations containing those members are to be found.
82          *
83          * Later the m_member_meta vectors are sorted according to the member
84          * ids so that a binary search (with std::equal_range) can be used in
85          * the second pass to find the parent relations for each node, way, or
86          * relation coming along. The member objects are stored together with
87          * their relation and once a relation is complete the
88          * complete_relation() method is called which you must overwrite in a
89          * derived class of Collector.
90          *
91          * @tparam TCollector Derived class of this class.
92          * @tparam TNodes Are we interested in member nodes?
93          * @tparam TWays Are we interested in member ways?
94          * @tparam TRelations Are we interested in member relations?
95          * @pre The Ids of all objects must be unique in the input data.
96          */
97         template <typename TCollector, bool TNodes, bool TWays, bool TRelations>
98         class Collector {
99 
100             /**
101              * This is the handler class for the first pass of the Collector.
102              */
103             class HandlerPass1 : public osmium::handler::Handler {
104 
105                 TCollector& m_collector;
106 
107             public:
108 
HandlerPass1(TCollector & collector)109                 explicit HandlerPass1(TCollector& collector) noexcept :
110                     m_collector(collector) {
111                 }
112 
relation(const osmium::Relation & relation)113                 void relation(const osmium::Relation& relation) {
114                     if (m_collector.keep_relation(relation)) {
115                         m_collector.add_relation(relation);
116                     }
117                 }
118 
119             }; // class HandlerPass1
120 
121         public:
122 
123             /**
124              * This is the handler class for the second pass of the Collector.
125              */
126             class HandlerPass2 : public osmium::handler::Handler {
127 
128                 osmium::handler::CheckOrder m_check_order;
129                 TCollector& m_collector;
130 
131             public:
132 
HandlerPass2(TCollector & collector)133                 explicit HandlerPass2(TCollector& collector) noexcept :
134                     m_collector(collector) {
135                 }
136 
node(const osmium::Node & node)137                 void node(const osmium::Node& node) {
138                     if (TNodes) {
139                         m_check_order.node(node);
140                         if (!m_collector.find_and_add_object(node)) {
141                             m_collector.node_not_in_any_relation(node);
142                         }
143                     }
144                 }
145 
way(const osmium::Way & way)146                 void way(const osmium::Way& way) {
147                     if (TWays) {
148                         m_check_order.way(way);
149                         if (!m_collector.find_and_add_object(way)) {
150                             m_collector.way_not_in_any_relation(way);
151                         }
152                     }
153                 }
154 
relation(const osmium::Relation & relation)155                 void relation(const osmium::Relation& relation) {
156                     if (TRelations) {
157                         m_check_order.relation(relation);
158                         if (!m_collector.find_and_add_object(relation)) {
159                             m_collector.relation_not_in_any_relation(relation);
160                         }
161                     }
162                 }
163 
flush()164                 void flush() {
165                     m_collector.flush();
166                 }
167 
168             }; // class HandlerPass2
169 
170         private:
171 
172             HandlerPass2 m_handler_pass2;
173 
174             // All relations we are interested in will be kept in this buffer
175             osmium::memory::Buffer m_relations_buffer;
176 
177             // All members we are interested in will be kept in this buffer
178             osmium::memory::Buffer m_members_buffer;
179 
180             /// Vector with all relations we are interested in
181             std::vector<RelationMeta> m_relations;
182 
183             /**
184              * One vector each for nodes, ways, and relations containing all
185              * mappings from member ids to their relations.
186              */
187             using mm_vector_type = std::vector<MemberMeta>;
188             using mm_iterator = mm_vector_type::iterator;
189             std::array<mm_vector_type, 3> m_member_meta;
190 
191             int m_count_complete = 0;
192 
193             using callback_func_type = std::function<void(osmium::memory::Buffer&&)>;
194             callback_func_type m_callback;
195 
196             enum {
197                 initial_buffer_size = 1024UL * 1024UL
198             };
199 
find_member_meta(osmium::item_type type,osmium::object_id_type id)200             iterator_range<mm_iterator> find_member_meta(osmium::item_type type, osmium::object_id_type id) {
201                 auto& mmv = member_meta(type);
202                 return make_range(std::equal_range(mmv.begin(), mmv.end(), MemberMeta(id)));
203             }
204 
205         public:
206 
207             /**
208              * Create an Collector.
209              */
Collector()210             Collector() :
211                 m_handler_pass2(*static_cast<TCollector*>(this)),
212                 m_relations_buffer(initial_buffer_size, osmium::memory::Buffer::auto_grow::yes),
213                 m_members_buffer(initial_buffer_size, osmium::memory::Buffer::auto_grow::yes) {
214             }
215 
216         protected:
217 
member_meta(const item_type type)218             std::vector<MemberMeta>& member_meta(const item_type type) {
219                 return m_member_meta[static_cast<uint16_t>(type) - 1];
220             }
221 
callback()222             callback_func_type callback() {
223                 return m_callback;
224             }
225 
relations() const226             const std::vector<RelationMeta>& relations() const {
227                 return m_relations;
228             }
229 
230             /**
231              * This method is called from the first pass handler for every
232              * relation in the input, to check whether it should be kept.
233              *
234              * Overwrite this method in a child class to only add relations
235              * you are interested in, for instance depending on the type tag.
236              * Storing relations takes a lot of memory, so it makes sense to
237              * filter this as much as possible.
238              */
keep_relation(const osmium::Relation &) const239             bool keep_relation(const osmium::Relation& /*relation*/) const {
240                 return true;
241             }
242 
243             /**
244              * This method is called for every member of every relation that
245              * should be kept. It should decide if the member is interesting or
246              * not and return true or false to signal that. Only interesting
247              * members are later added to the relation.
248              *
249              * Overwrite this method in a child class. In the MultiPolygonCollector
250              * this is for instance used to only keep members of type way and
251              * ignore all others.
252              */
keep_member(const RelationMeta &,const osmium::RelationMember &) const253             bool keep_member(const RelationMeta& /*relation_meta*/, const osmium::RelationMember& /*member*/) const {
254                 return true;
255             }
256 
257             /**
258              * This method is called for all nodes that are not a member of
259              * any relation.
260              *
261              * Overwrite this method in a child class if you are interested
262              * in this.
263              */
node_not_in_any_relation(const osmium::Node &)264             void node_not_in_any_relation(const osmium::Node& /*node*/) {
265             }
266 
267             /**
268              * This method is called for all ways that are not a member of
269              * any relation.
270              *
271              * Overwrite this method in a child class if you are interested
272              * in this.
273              */
way_not_in_any_relation(const osmium::Way &)274             void way_not_in_any_relation(const osmium::Way& /*way*/) {
275             }
276 
277             /**
278              * This method is called for all relations that are not a member of
279              * any relation.
280              *
281              * Overwrite this method in a child class if you are interested
282              * in this.
283              */
relation_not_in_any_relation(const osmium::Relation &)284             void relation_not_in_any_relation(const osmium::Relation& /*relation*/) {
285             }
286 
287             /**
288              * This method is called from the 2nd pass handler when all objects
289              * of types we are interested in have been seen.
290              *
291              * Overwrite this method in a child class if you are interested
292              * in this.
293              *
294              * Note that even after this call members might be missing if they
295              * were not in the input file! The derived class has to handle this
296              * case.
297              */
flush()298             void flush() {
299             }
300 
get_relation(size_t offset) const301             const osmium::Relation& get_relation(size_t offset) const {
302                 assert(m_relations_buffer.committed() > offset);
303                 return m_relations_buffer.get<osmium::Relation>(offset);
304             }
305 
306             /**
307              * Get the relation from a relation_meta.
308              */
get_relation(const RelationMeta & relation_meta) const309             const osmium::Relation& get_relation(const RelationMeta& relation_meta) const {
310                 return get_relation(relation_meta.relation_offset());
311             }
312 
313             /**
314              * Get the relation from a member_meta.
315              */
get_relation(const MemberMeta & member_meta) const316             const osmium::Relation& get_relation(const MemberMeta& member_meta) const {
317                 return get_relation(m_relations[member_meta.relation_pos()]);
318             }
319 
get_member(size_t offset) const320             osmium::OSMObject& get_member(size_t offset) const {
321                 assert(m_members_buffer.committed() > offset);
322                 return m_members_buffer.get<osmium::OSMObject>(offset);
323             }
324 
325         private:
326 
327             /**
328              * Tell the Collector that you are interested in this relation
329              * and want it kept until all members have been assembled and
330              * it is handed back to you.
331              *
332              * The relation is copied and stored in a buffer inside the
333              * collector.
334              */
add_relation(const osmium::Relation & relation)335             void add_relation(const osmium::Relation& relation) {
336                 const size_t offset = m_relations_buffer.committed();
337                 m_relations_buffer.add_item(relation);
338 
339                 RelationMeta relation_meta{offset};
340 
341                 int n = 0;
342                 for (auto& member : m_relations_buffer.get<osmium::Relation>(offset).members()) {
343                     if (static_cast<TCollector*>(this)->keep_member(relation_meta, member)) {
344                         member_meta(member.type()).emplace_back(member.ref(), m_relations.size(), n);
345                         relation_meta.increment_need_members();
346                     } else {
347                         member.set_ref(0); // set member id to zero to indicate we are not interested
348                     }
349                     ++n;
350                 }
351 
352                 assert(offset == m_relations_buffer.committed());
353                 if (relation_meta.has_all_members()) {
354                     m_relations_buffer.rollback();
355                 } else {
356                     m_relations_buffer.commit();
357                     m_relations.push_back(relation_meta);
358                 }
359             }
360 
361             /**
362              * Sort the vectors with the member infos so that we can do binary
363              * search on them.
364              */
sort_member_meta()365             void sort_member_meta() {
366                 std::sort(m_member_meta[0].begin(), m_member_meta[0].end());
367                 std::sort(m_member_meta[1].begin(), m_member_meta[1].end());
368                 std::sort(m_member_meta[2].begin(), m_member_meta[2].end());
369             }
370 
count_not_removed(const iterator_range<mm_iterator> & range)371             static typename iterator_range<mm_iterator>::iterator::difference_type count_not_removed(const iterator_range<mm_iterator>& range) {
372                 return std::count_if(range.begin(), range.end(), [](const MemberMeta& mm) {
373                     return !mm.removed();
374                 });
375             }
376 
377             /**
378              * Find this object in the member vectors and add it to all
379              * relations that need it.
380              *
381              * @returns true if the member was added to at least one
382              *          relation and false otherwise
383              */
find_and_add_object(const osmium::OSMObject & object)384             bool find_and_add_object(const osmium::OSMObject& object) {
385                 auto range = find_member_meta(object.type(), object.id());
386 
387                 if (count_not_removed(range) == 0) {
388                     // nothing found
389                     return false;
390                 }
391 
392                 {
393                     members_buffer().add_item(object);
394                     const size_t member_offset = members_buffer().commit();
395 
396                     for (auto& member : range) {
397                         member.set_buffer_offset(member_offset);
398                     }
399                 }
400 
401                 for (auto& member : range) {
402                     if (member.removed()) {
403                         break;
404                     }
405                     assert(member.member_id() == object.id());
406                     assert(member.relation_pos() < m_relations.size());
407                     RelationMeta& relation_meta = m_relations[member.relation_pos()];
408                     assert(member.member_pos() < get_relation(relation_meta).members().size());
409                     relation_meta.got_one_member();
410                     if (relation_meta.has_all_members()) {
411                         const size_t relation_offset = member.relation_pos();
412                         static_cast<TCollector*>(this)->complete_relation(relation_meta);
413                         clear_member_metas(relation_meta);
414                         m_relations[relation_offset] = RelationMeta{};
415                         possibly_purge_removed_members();
416                     }
417                 }
418 
419                 return true;
420             }
421 
clear_member_metas(const RelationMeta & relation_meta)422             void clear_member_metas(const RelationMeta& relation_meta) {
423                 const osmium::Relation& relation = get_relation(relation_meta);
424                 for (const auto& member : relation.members()) {
425                     if (member.ref() != 0) {
426                         const auto range = find_member_meta(member.type(), member.ref());
427                         assert(!range.empty());
428 
429                         // if this is the last time this object was needed
430                         // then mark it as removed
431                         if (count_not_removed(range) == 1) {
432                             get_member(range.begin()->buffer_offset()).set_removed(true);
433                         }
434 
435                         for (auto& member_meta : range) {
436                             if (!member_meta.removed() && relation.id() == get_relation(member_meta).id()) {
437                                 member_meta.remove();
438                                 break;
439                             }
440                         }
441                     }
442                 }
443             }
444 
445         public:
446 
used_memory() const447             uint64_t used_memory() const {
448                 const uint64_t nmembers = m_member_meta[0].capacity() + m_member_meta[1].capacity() + m_member_meta[2].capacity();
449                 const uint64_t members = nmembers * sizeof(MemberMeta);
450                 const uint64_t relations_size = m_relations.capacity() * sizeof(RelationMeta);
451                 const uint64_t relations_buffer_capacity = m_relations_buffer.capacity();
452                 const uint64_t members_buffer_capacity = m_members_buffer.capacity();
453 
454                 std::cerr << "  nR  = m_relations.capacity() ........... = " << std::setw(12) << m_relations.capacity() << "\n";
455                 std::cerr << "  nMN = m_member_meta[NODE].capacity() ... = " << std::setw(12) << m_member_meta[0].capacity() << "\n";
456                 std::cerr << "  nMW = m_member_meta[WAY].capacity() .... = " << std::setw(12) << m_member_meta[1].capacity() << "\n";
457                 std::cerr << "  nMR = m_member_meta[RELATION].capacity() = " << std::setw(12) << m_member_meta[2].capacity() << "\n";
458                 std::cerr << "  nM  = m_member_meta[*].capacity() ...... = " << std::setw(12) << nmembers << "\n";
459 
460                 std::cerr << "  sRM = sizeof(RelationMeta) ............. = " << std::setw(12) << sizeof(RelationMeta) << "\n";
461                 std::cerr << "  sMM = sizeof(MemberMeta) ............... = " << std::setw(12) << sizeof(MemberMeta) << "\n\n";
462 
463                 std::cerr << "  nR * sRM ............................... = " << std::setw(12) << relations_size << "\n";
464                 std::cerr << "  nM * sMM ............................... = " << std::setw(12) << members << "\n";
465                 std::cerr << "  relations_buffer_capacity .............. = " << std::setw(12) << relations_buffer_capacity << "\n";
466                 std::cerr << "  members_buffer_capacity ................ = " << std::setw(12) << members_buffer_capacity << "\n";
467 
468                 const uint64_t total = relations_size + members + relations_buffer_capacity + members_buffer_capacity;
469 
470                 std::cerr << "  total .................................. = " << std::setw(12) << total << "\n";
471                 std::cerr << "  =======================================================\n";
472 
473                 return relations_buffer_capacity + members_buffer_capacity + relations_size + members;
474             }
475 
476             /**
477              * Return reference to second pass handler.
478              */
handler(const callback_func_type & callback=nullptr)479             HandlerPass2& handler(const callback_func_type& callback = nullptr) {
480                 m_callback = callback;
481                 return m_handler_pass2;
482             }
483 
members_buffer()484             osmium::memory::Buffer& members_buffer() {
485                 return m_members_buffer;
486             }
487 
488             /**
489              * Is the given member available in the members buffer?
490              *
491              * If you also need the offset of the object, use
492              * get_availability_and_offset() instead, it is more efficient
493              * that way.
494              *
495              * @param type Item type
496              * @param id Object Id
497              * @returns True if the object is available, false otherwise.
498              */
is_available(osmium::item_type type,osmium::object_id_type id)499             bool is_available(osmium::item_type type, osmium::object_id_type id) {
500                 const auto range = find_member_meta(type, id);
501                 assert(!range.empty());
502                 return range.begin()->is_available();
503             }
504 
505             /**
506              * Get offset of a member in the members buffer.
507              *
508              * @pre The member must be available. If you are not sure, call
509              *      get_availability_and_offset() instead.
510              * @param type Item type
511              * @param id Object Id
512              * @returns The offset of the object in the members buffer.
513              */
get_offset(osmium::item_type type,osmium::object_id_type id)514             size_t get_offset(osmium::item_type type, osmium::object_id_type id) {
515                 const auto range = find_member_meta(type, id);
516                 assert(!range.empty());
517                 assert(range.begin()->is_available());
518                 return range.begin()->buffer_offset();
519             }
520 
521             /**
522              * Checks whether a member is available in the members buffer
523              * and returns its offset.
524              *
525              * If the member is not available, the boolean returned as the
526              * first element in the pair is false. In that case the offset
527              * in the second element is undefined.
528              *
529              * If the member is available, the boolean returned as the first
530              * element in the pair is true and the second element of the
531              * pair contains the offset into the members buffer.
532              *
533              * @param type Item type
534              * @param id Object Id
535              * @returns Pair of bool (showing availability) and the offset.
536              */
get_availability_and_offset(osmium::item_type type,osmium::object_id_type id)537             std::pair<bool, size_t> get_availability_and_offset(osmium::item_type type, osmium::object_id_type id) {
538                 const auto range = find_member_meta(type, id);
539                 assert(!range.empty());
540                 if (range.begin()->is_available()) {
541                     return std::make_pair(true, range.begin()->buffer_offset());
542                 }
543                 return std::make_pair(false, 0);
544             }
545 
546             template <typename TIter>
read_relations(TIter begin,TIter end)547             void read_relations(TIter begin, TIter end) {
548                 HandlerPass1 handler_pass1{*static_cast<TCollector*>(this)};
549                 osmium::apply(begin, end, handler_pass1);
550                 sort_member_meta();
551             }
552 
553             template <typename TSource>
read_relations(TSource & source)554             void read_relations(TSource& source) {
555                 using std::begin;
556                 using std::end;
557                 read_relations(begin(source), end(source));
558                 source.close();
559             }
560 
moving_in_buffer(size_t old_offset,size_t new_offset)561             void moving_in_buffer(size_t old_offset, size_t new_offset) {
562                 const osmium::OSMObject& object = m_members_buffer.get<osmium::OSMObject>(old_offset);
563                 auto range = find_member_meta(object.type(), object.id());
564                 for (auto& member : range) {
565                     assert(member.buffer_offset() == old_offset);
566                     member.set_buffer_offset(new_offset);
567                 }
568             }
569 
570             /**
571              * Decide whether to purge removed members and then do it.
572              *
573              * Currently the purging is done every 10000 calls.
574              * This could probably be improved upon.
575              */
possibly_purge_removed_members()576             void possibly_purge_removed_members() {
577                 ++m_count_complete;
578                 if (m_count_complete > 10000) { // XXX
579 //                    const size_t size_before = m_members_buffer.committed();
580                     m_members_buffer.purge_removed(this);
581 /*
582                     const size_t size_after = m_members_buffer.committed();
583                     double percent = static_cast<double>(size_before - size_after);
584                     percent /= size_before;
585                     percent *= 100;
586                     std::cerr << "PURGE (size before=" << size_before << " after=" << size_after << " purged=" << (size_before - size_after) << " / " << static_cast<int>(percent) << "%)\n";
587 */
588                     m_count_complete = 0;
589                 }
590             }
591 
592             /**
593              * Get a vector with pointers to all Relations that could not
594              * be completed, because members were missing in the input
595              * data.
596              *
597              * Note that these pointers point into memory allocated and
598              * owned by the Collector object.
599              */
get_incomplete_relations() const600             std::vector<const osmium::Relation*> get_incomplete_relations() const {
601                 std::vector<const osmium::Relation*> incomplete_relations;
602                 for (const auto& relation_meta : m_relations) {
603                     if (!relation_meta.has_all_members()) {
604                         incomplete_relations.push_back(&get_relation(relation_meta));
605                     }
606                 }
607                 return incomplete_relations;
608             }
609 
610         }; // class Collector
611 
612     } // namespace relations
613 
614 } // namespace osmium
615 
616 #endif // OSMIUM_RELATIONS_COLLECTOR_HPP
617