1 /*
2  * Copyright (C) 2020 Veloman Yunkan
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
11  * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
12  * NON-INFRINGEMENT.  See the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
17  *
18  */
19 
20 #ifndef ZIM_DIRENT_LOOKUP_H
21 #define ZIM_DIRENT_LOOKUP_H
22 
23 #include "zim_types.h"
24 #include "debug.h"
25 #include "narrowdown.h"
26 
27 #include <algorithm>
28 #include <map>
29 #include <mutex>
30 #include <vector>
31 
32 namespace zim
33 {
34 
35 template<class Impl>
36 class DirentLookup
37 {
38 public: // types
39   typedef std::pair<bool, article_index_t> Result;
40 
41 public: // functions
42   DirentLookup(Impl* _impl, article_index_type cacheEntryCount);
43 
44   article_index_t getNamespaceRangeBegin(char ns) const;
45   article_index_t getNamespaceRangeEnd(char ns) const;
46 
47   Result find(char ns, const std::string& url);
48 
49 private: // functions
50   std::string getDirentKey(article_index_type i) const;
51 
52 private: // types
53   typedef std::map<char, article_index_t> NamespaceBoundaryCache;
54 
55 private: // data
56   Impl* impl = nullptr;
57 
58   mutable NamespaceBoundaryCache namespaceBoundaryCache;
59   mutable std::mutex cacheAccessMutex;
60 
61   article_index_type articleCount = 0;
62   NarrowDown lookupGrid;
63 };
64 
65 template<class Impl>
66 std::string
getDirentKey(article_index_type i)67 DirentLookup<Impl>::getDirentKey(article_index_type i) const
68 {
69   const auto d = impl->getDirent(article_index_t(i));
70   return d->getNamespace() + d->getUrl();
71 }
72 
73 template<class Impl>
DirentLookup(Impl * _impl,article_index_type cacheEntryCount)74 DirentLookup<Impl>::DirentLookup(Impl* _impl, article_index_type cacheEntryCount)
75 {
76   ASSERT(impl == nullptr, ==, true);
77   impl = _impl;
78   articleCount = article_index_type(impl->getCountArticles());
79   if ( articleCount )
80   {
81     const article_index_type step = std::max(1u, articleCount/cacheEntryCount);
82     for ( article_index_type i = 0; i < articleCount-1; i += step )
83     {
84         lookupGrid.add(getDirentKey(i), i, getDirentKey(i+1));
85     }
86     lookupGrid.close(getDirentKey(articleCount - 1), articleCount - 1);
87   }
88 }
89 
90 template<typename IMPL>
getNamespaceBeginOffset(IMPL & impl,char ch)91 article_index_t getNamespaceBeginOffset(IMPL& impl, char ch)
92 {
93   ASSERT(ch, >=, 32);
94   ASSERT(ch, <=, 127);
95 
96   article_index_type lower = 0;
97   article_index_type upper = article_index_type(impl.getCountArticles());
98   auto d = impl.getDirent(article_index_t(0));
99   while (upper - lower > 1)
100   {
101     article_index_type m = lower + (upper - lower) / 2;
102     auto d = impl.getDirent(article_index_t(m));
103     if (d->getNamespace() >= ch)
104       upper = m;
105     else
106       lower = m;
107   }
108 
109   article_index_t ret = article_index_t(d->getNamespace() < ch ? upper : lower);
110   return ret;
111 }
112 
113 template<typename IMPL>
getNamespaceEndOffset(IMPL & impl,char ch)114 article_index_t getNamespaceEndOffset(IMPL& impl, char ch)
115 {
116   ASSERT(ch, >=, 32);
117   ASSERT(ch, <, 127);
118   return getNamespaceBeginOffset(impl, ch+1);
119 }
120 
121 
122 
123 template<class Impl>
124 article_index_t
getNamespaceRangeBegin(char ch)125 DirentLookup<Impl>::getNamespaceRangeBegin(char ch) const
126 {
127   ASSERT(ch, >=, 32);
128   ASSERT(ch, <=, 127);
129 
130   {
131     std::lock_guard<std::mutex> lock(cacheAccessMutex);
132     const auto it = namespaceBoundaryCache.find(ch);
133     if (it != namespaceBoundaryCache.end())
134       return it->second;
135   }
136 
137   auto ret = getNamespaceBeginOffset(*impl, ch);
138 
139   std::lock_guard<std::mutex> lock(cacheAccessMutex);
140   namespaceBoundaryCache[ch] = ret;
141   return ret;
142 }
143 
144 template<class Impl>
145 article_index_t
getNamespaceRangeEnd(char ns)146 DirentLookup<Impl>::getNamespaceRangeEnd(char ns) const
147 {
148   return getNamespaceRangeBegin(ns+1);
149 }
150 
151 template<typename Impl>
152 typename DirentLookup<Impl>::Result
find(char ns,const std::string & url)153 DirentLookup<Impl>::find(char ns, const std::string& url)
154 {
155   const auto r = lookupGrid.getRange(ns + url);
156   article_index_type l(r.begin);
157   article_index_type u(r.end);
158 
159   if (l == u)
160     return {false, article_index_t(l)};
161 
162   while (true)
163   {
164     article_index_type p = l + (u - l) / 2;
165     const auto d = impl->getDirent(article_index_t(p));
166 
167     const int c = ns < d->getNamespace() ? -1
168                 : ns > d->getNamespace() ? 1
169                 : url.compare(d->getUrl());
170 
171     if (c < 0)
172       u = p;
173     else if (c > 0)
174     {
175       if ( l == p )
176         return {false, article_index_t(u)};
177       l = p;
178     }
179     else
180       return {true, article_index_t(p)};
181   }
182 }
183 
184 } // namespace zim
185 
186 #endif // ZIM_DIRENT_LOOKUP_H
187