1 /*
2 * Copyright (C) 2020 Veloman Yunkan
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
11 * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
12 * NON-INFRINGEMENT. See the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 */
19
20 #ifndef ZIM_DIRENT_LOOKUP_H
21 #define ZIM_DIRENT_LOOKUP_H
22
23 #include "zim_types.h"
24 #include "debug.h"
25 #include "narrowdown.h"
26
27 #include <algorithm>
28 #include <map>
29 #include <mutex>
30 #include <vector>
31
32 namespace zim
33 {
34
35 template<class Impl>
36 class DirentLookup
37 {
38 public: // types
39 typedef std::pair<bool, article_index_t> Result;
40
41 public: // functions
42 DirentLookup(Impl* _impl, article_index_type cacheEntryCount);
43
44 article_index_t getNamespaceRangeBegin(char ns) const;
45 article_index_t getNamespaceRangeEnd(char ns) const;
46
47 Result find(char ns, const std::string& url);
48
49 private: // functions
50 std::string getDirentKey(article_index_type i) const;
51
52 private: // types
53 typedef std::map<char, article_index_t> NamespaceBoundaryCache;
54
55 private: // data
56 Impl* impl = nullptr;
57
58 mutable NamespaceBoundaryCache namespaceBoundaryCache;
59 mutable std::mutex cacheAccessMutex;
60
61 article_index_type articleCount = 0;
62 NarrowDown lookupGrid;
63 };
64
65 template<class Impl>
66 std::string
getDirentKey(article_index_type i)67 DirentLookup<Impl>::getDirentKey(article_index_type i) const
68 {
69 const auto d = impl->getDirent(article_index_t(i));
70 return d->getNamespace() + d->getUrl();
71 }
72
73 template<class Impl>
DirentLookup(Impl * _impl,article_index_type cacheEntryCount)74 DirentLookup<Impl>::DirentLookup(Impl* _impl, article_index_type cacheEntryCount)
75 {
76 ASSERT(impl == nullptr, ==, true);
77 impl = _impl;
78 articleCount = article_index_type(impl->getCountArticles());
79 if ( articleCount )
80 {
81 const article_index_type step = std::max(1u, articleCount/cacheEntryCount);
82 for ( article_index_type i = 0; i < articleCount-1; i += step )
83 {
84 lookupGrid.add(getDirentKey(i), i, getDirentKey(i+1));
85 }
86 lookupGrid.close(getDirentKey(articleCount - 1), articleCount - 1);
87 }
88 }
89
90 template<typename IMPL>
getNamespaceBeginOffset(IMPL & impl,char ch)91 article_index_t getNamespaceBeginOffset(IMPL& impl, char ch)
92 {
93 ASSERT(ch, >=, 32);
94 ASSERT(ch, <=, 127);
95
96 article_index_type lower = 0;
97 article_index_type upper = article_index_type(impl.getCountArticles());
98 auto d = impl.getDirent(article_index_t(0));
99 while (upper - lower > 1)
100 {
101 article_index_type m = lower + (upper - lower) / 2;
102 auto d = impl.getDirent(article_index_t(m));
103 if (d->getNamespace() >= ch)
104 upper = m;
105 else
106 lower = m;
107 }
108
109 article_index_t ret = article_index_t(d->getNamespace() < ch ? upper : lower);
110 return ret;
111 }
112
113 template<typename IMPL>
getNamespaceEndOffset(IMPL & impl,char ch)114 article_index_t getNamespaceEndOffset(IMPL& impl, char ch)
115 {
116 ASSERT(ch, >=, 32);
117 ASSERT(ch, <, 127);
118 return getNamespaceBeginOffset(impl, ch+1);
119 }
120
121
122
123 template<class Impl>
124 article_index_t
getNamespaceRangeBegin(char ch)125 DirentLookup<Impl>::getNamespaceRangeBegin(char ch) const
126 {
127 ASSERT(ch, >=, 32);
128 ASSERT(ch, <=, 127);
129
130 {
131 std::lock_guard<std::mutex> lock(cacheAccessMutex);
132 const auto it = namespaceBoundaryCache.find(ch);
133 if (it != namespaceBoundaryCache.end())
134 return it->second;
135 }
136
137 auto ret = getNamespaceBeginOffset(*impl, ch);
138
139 std::lock_guard<std::mutex> lock(cacheAccessMutex);
140 namespaceBoundaryCache[ch] = ret;
141 return ret;
142 }
143
144 template<class Impl>
145 article_index_t
getNamespaceRangeEnd(char ns)146 DirentLookup<Impl>::getNamespaceRangeEnd(char ns) const
147 {
148 return getNamespaceRangeBegin(ns+1);
149 }
150
151 template<typename Impl>
152 typename DirentLookup<Impl>::Result
find(char ns,const std::string & url)153 DirentLookup<Impl>::find(char ns, const std::string& url)
154 {
155 const auto r = lookupGrid.getRange(ns + url);
156 article_index_type l(r.begin);
157 article_index_type u(r.end);
158
159 if (l == u)
160 return {false, article_index_t(l)};
161
162 while (true)
163 {
164 article_index_type p = l + (u - l) / 2;
165 const auto d = impl->getDirent(article_index_t(p));
166
167 const int c = ns < d->getNamespace() ? -1
168 : ns > d->getNamespace() ? 1
169 : url.compare(d->getUrl());
170
171 if (c < 0)
172 u = p;
173 else if (c > 0)
174 {
175 if ( l == p )
176 return {false, article_index_t(u)};
177 l = p;
178 }
179 else
180 return {true, article_index_t(p)};
181 }
182 }
183
184 } // namespace zim
185
186 #endif // ZIM_DIRENT_LOOKUP_H
187