1 /*
2 ** Copyright (C) 2019 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
3 **
4 ** This program is free software; you can redistribute it and/or modify it
5 ** under the terms of the GNU General Public License as published by the
6 ** Free Software Foundation; either version 3, or (at your option) any
7 ** later version.
8 **
9 ** This program is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ** GNU General Public License for more details.
13 **
14 ** You should have received a copy of the GNU General Public License
15 ** along with this program; if not, write to the Free Software Foundation,
16 ** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 **
18 */
19
20 #include "mu-contacts.hh"
21
22 #include <mutex>
23 #include <unordered_map>
24 #include <set>
25 #include <sstream>
26 #include <functional>
27 #include <algorithm>
28 #include <regex>
29 #include <ctime>
30
31 #include <utils/mu-utils.hh>
32 #include <glib.h>
33
34 using namespace Mu;
35
ContactInfo(const std::string & _full_address,const std::string & _email,const std::string & _name,bool _personal,time_t _last_seen,size_t _freq)36 ContactInfo::ContactInfo (const std::string& _full_address,
37 const std::string& _email,
38 const std::string& _name,
39 bool _personal,
40 time_t _last_seen,
41 size_t _freq):
42 full_address{_full_address},
43 email{_email},
44 name{_name},
45 personal{_personal},
46 last_seen{_last_seen},
47 freq{_freq},
48 tstamp{g_get_monotonic_time()} {}
49
50 struct EmailHash {
operator ()EmailHash51 std::size_t operator()(const std::string& email) const {
52 std::size_t djb = 5381; // djb hash
53 for (const auto c : email)
54 djb = ((djb << 5) + djb) + g_ascii_tolower(c);
55 return djb;
56 }
57 };
58
59 struct EmailEqual {
operator ()EmailEqual60 bool operator()(const std::string& email1, const std::string& email2) const {
61 return g_ascii_strcasecmp(email1.c_str(), email2.c_str()) == 0;
62 }
63 };
64
65 struct ContactInfoHash {
operator ()ContactInfoHash66 std::size_t operator()(const ContactInfo& ci) const {
67 std::size_t djb = 5381; // djb hash
68 for (const auto c : ci.email)
69 djb = ((djb << 5) + djb) + g_ascii_tolower(c);
70 return djb;
71 }
72 };
73
74 struct ContactInfoEqual {
operator ()ContactInfoEqual75 bool operator()(const Mu::ContactInfo& ci1, const Mu::ContactInfo& ci2) const {
76 return g_ascii_strcasecmp(ci1.email.c_str(), ci2.email.c_str()) == 0;
77 }
78 };
79
80 constexpr auto RecentOffset{15 * 24 * 3600};
81 struct ContactInfoLessThan {
82
ContactInfoLessThanContactInfoLessThan83 ContactInfoLessThan(): recently_{::time({}) - RecentOffset} {}
84
operator ()ContactInfoLessThan85 bool operator()(const Mu::ContactInfo& ci1, const Mu::ContactInfo& ci2) const {
86
87 if (ci1.personal != ci2.personal)
88 return ci1.personal; // personal comes first
89
90 if ((ci1.last_seen > recently_) != (ci2.last_seen > recently_))
91 return ci1.last_seen > ci2.last_seen;
92
93 if (ci1.freq != ci2.freq) // more frequent comes first
94 return ci1.freq > ci2.freq;
95
96 return g_ascii_strcasecmp(ci1.email.c_str(), ci2.email.c_str()) < 0;
97 }
98 // only sort recently seen contacts by recency; approx 15 days.
99 // this changes during the lifetime, but that's all fine.
100 const time_t recently_;
101 };
102
103 using ContactUMap = std::unordered_map<const std::string, ContactInfo, EmailHash, EmailEqual>;
104 //using ContactUSet = std::unordered_set<ContactInfo, ContactInfoHash, ContactInfoEqual>;
105 using ContactSet = std::set<std::reference_wrapper<const ContactInfo>, ContactInfoLessThan>;
106
107 struct Contacts::Private {
PrivateContacts::Private108 Private(const std::string& serialized,
109 const StringVec& personal):
110 contacts_{deserialize(serialized)} {
111 make_personal(personal);
112 }
113
114 void make_personal(const StringVec& personal);
115 ContactUMap deserialize(const std::string&) const;
116 std::string serialize() const;
117
118 ContactUMap contacts_;
119 std::mutex mtx_;
120
121 StringVec personal_plain_;
122 std::vector<std::regex> personal_rx_;
123 };
124
125 constexpr auto Separator = "\xff"; // Invalid in UTF-8
126
127 void
make_personal(const StringVec & personal)128 Contacts::Private::make_personal (const StringVec& personal)
129 {
130 for (auto&& p: personal) {
131
132 if (p.empty())
133 continue; // invalid
134
135 if (p.size() < 2 || p.at(0) != '/' || p.at(p.length() - 1) != '/')
136 personal_plain_.emplace_back(p); // normal address
137 else {
138 // a regex pattern.
139 try {
140 const auto rxstr{p.substr(1, p.length()-2)};
141 personal_rx_.emplace_back(
142 std::regex(rxstr,
143 std::regex::basic |
144 std::regex::optimize |
145 std::regex::icase));
146
147 } catch (const std::regex_error& rex) {
148 g_warning ("invalid personal address regexp '%s': %s",
149 p.c_str(), rex.what());
150 }
151 }
152 }
153 }
154
155 ContactUMap
deserialize(const std::string & serialized) const156 Contacts::Private::deserialize(const std::string& serialized) const
157 {
158 ContactUMap contacts;
159 std::stringstream ss{serialized, std::ios_base::in};
160 std::string line;
161
162 while (getline (ss, line)) {
163
164 const auto parts = Mu::split (line, Separator);
165 if (G_UNLIKELY(parts.size() != 6)) {
166 g_warning ("error: '%s'", line.c_str());
167 continue;
168 }
169
170 ContactInfo ci(std::move(parts[0]), // full address
171 parts[1], // email
172 std::move(parts[2]), // name
173 parts[3][0] == '1' ? true : false, // personal
174 (time_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // last_seen
175 (std::size_t)g_ascii_strtoll(parts[5].c_str(), NULL, 10)); // freq
176
177 contacts.emplace(std::move(parts[1]), std::move(ci));
178 }
179
180 return contacts;
181 }
182
183
Contacts(const std::string & serialized,const StringVec & personal)184 Contacts::Contacts (const std::string& serialized, const StringVec& personal) :
185 priv_{std::make_unique<Private>(serialized, personal)}
186 {}
187
188 Contacts::~Contacts() = default;
189
190 std::string
serialize() const191 Contacts::serialize() const
192 {
193 std::lock_guard<std::mutex> l_{priv_->mtx_};
194 std::string s;
195
196 for (auto& item: priv_->contacts_) {
197 const auto& ci{item.second};
198 s += Mu::format("%s%s"
199 "%s%s"
200 "%s%s"
201 "%d%s"
202 "%" G_GINT64_FORMAT "%s"
203 "%" G_GINT64_FORMAT "\n",
204 ci.full_address.c_str(), Separator,
205 ci.email.c_str(), Separator,
206 ci.name.c_str(), Separator,
207 ci.personal ? 1 : 0, Separator,
208 (gint64)ci.last_seen, Separator,
209 (gint64)ci.freq);
210 }
211
212 return s;
213 }
214
215 const ContactInfo
add(ContactInfo && ci)216 Contacts::add (ContactInfo&& ci)
217 {
218 std::lock_guard<std::mutex> l_{priv_->mtx_};
219
220 auto it = priv_->contacts_.find(ci.email);
221
222 if (it == priv_->contacts_.end()) { // completely new contact
223
224 ci.name = Mu::remove_ctrl(ci.name);
225 ci.full_address = remove_ctrl(ci.full_address);
226
227 auto email{ci.email};
228 return priv_->contacts_.emplace(ContactUMap::value_type(email, std::move(ci))).first->second;
229
230 } else { // existing contact.
231 auto& ci_existing{it->second};
232 ++ci_existing.freq;
233
234 if (ci.last_seen > ci_existing.last_seen) { // update.
235
236 ci_existing.email = std::move(ci.email);
237 ci_existing.name = Mu::remove_ctrl(ci.name);
238 ci_existing.full_address = Mu::remove_ctrl(ci.full_address);
239
240 ci_existing.tstamp = g_get_monotonic_time();
241 ci_existing.last_seen = ci.last_seen;
242 }
243
244 return ci;
245 }
246 }
247
248
249 const ContactInfo*
_find(const std::string & email) const250 Contacts::_find (const std::string& email) const
251 {
252 std::lock_guard<std::mutex> l_{priv_->mtx_};
253
254 const auto it = priv_->contacts_.find(email);
255 if (it == priv_->contacts_.end())
256 return {};
257 else
258 return &it->second;
259 }
260
261
262 void
clear()263 Contacts::clear()
264 {
265 std::lock_guard<std::mutex> l_{priv_->mtx_};
266
267 priv_->contacts_.clear();
268 }
269
270
271 std::size_t
size() const272 Contacts::size() const
273 {
274 std::lock_guard<std::mutex> l_{priv_->mtx_};
275
276 return priv_->contacts_.size();
277 }
278
279
280 void
for_each(const EachContactFunc & each_contact) const281 Contacts::for_each(const EachContactFunc& each_contact) const
282 {
283 std::lock_guard<std::mutex> l_{priv_->mtx_};
284
285 if (!each_contact)
286 return; // nothing to do
287
288 // first sort them for 'rank'
289 ContactSet sorted;
290 for (const auto& item: priv_->contacts_)
291 sorted.emplace(item.second);
292
293 for (const auto& ci: sorted)
294 each_contact (ci);
295 }
296
297 bool
is_personal(const std::string & addr) const298 Contacts::is_personal(const std::string& addr) const
299 {
300 for (auto&& p: priv_->personal_plain_)
301 if (g_ascii_strcasecmp(addr.c_str(), p.c_str()) == 0)
302 return true;
303
304 for (auto&& rx: priv_->personal_rx_) {
305 std::smatch m; // perhaps cache addr in personal_plain_?
306 if (std::regex_match(addr, m, rx))
307 return true;
308 }
309
310 return false;
311 }
312
313
314 #ifdef BUILD_TESTS
315 /*
316 * Tests.
317 *
318 */
319
320 #include "test-mu-common.hh"
321
322 static void
test_mu_contacts_01()323 test_mu_contacts_01()
324 {
325 Mu::Contacts contacts ("");
326
327 g_assert_true (contacts.empty());
328 g_assert_cmpuint (contacts.size(), ==, 0);
329
330 contacts.add(Mu::ContactInfo ("Foo <foo.bar@example.com>",
331 "foo.bar@example.com", "Foo", false, 12345));
332 g_assert_false (contacts.empty());
333 g_assert_cmpuint (contacts.size(), ==, 1);
334
335 contacts.add(Mu::ContactInfo ("Cuux <cuux.fnorb@example.com>",
336 "cuux@example.com", "Cuux", false, 54321));
337
338 g_assert_cmpuint (contacts.size(), ==, 2);
339
340 contacts.add(Mu::ContactInfo ("foo.bar@example.com",
341 "foo.bar@example.com", "Foo", false, 77777));
342 g_assert_cmpuint (contacts.size(), ==, 2);
343
344 contacts.add(Mu::ContactInfo ("Foo.Bar@Example.Com",
345 "Foo.Bar@Example.Com", "Foo", false, 88888));
346 g_assert_cmpuint (contacts.size(), ==, 2);
347 // note: replaces first.
348
349 {
350 const auto info = contacts._find("bla@example.com");
351 g_assert_false (info);
352 }
353
354 {
355 const auto info = contacts._find("foo.BAR@example.com");
356 g_assert_true (info);
357
358 g_assert_cmpstr(info->email.c_str(), ==, "Foo.Bar@Example.Com");
359 }
360
361 contacts.clear();
362 g_assert_true (contacts.empty());
363 g_assert_cmpuint (contacts.size(), ==, 0);
364 }
365
366 static void
test_mu_contacts_02()367 test_mu_contacts_02()
368 {
369 Mu::StringVec personal = {
370 "foo@example.com",
371 "bar@cuux.org",
372 "/bar-.*@fnorb.f./"
373 };
374 Mu::Contacts contacts{"", personal};
375
376 g_assert_true (contacts.is_personal("foo@example.com"));
377 g_assert_true (contacts.is_personal("Bar@CuuX.orG"));
378 g_assert_true (contacts.is_personal("bar-123abc@fnorb.fi"));
379 g_assert_true (contacts.is_personal("bar-zzz@fnorb.fr"));
380
381 g_assert_false (contacts.is_personal("foo@bar.com"));
382 g_assert_false (contacts.is_personal("BÂr@CuuX.orG"));
383 g_assert_false (contacts.is_personal("bar@fnorb.fi"));
384 g_assert_false (contacts.is_personal("bar-zzz@fnorb.xr"));
385 }
386
387
388
389 int
main(int argc,char * argv[])390 main (int argc, char *argv[])
391 {
392 g_test_init (&argc, &argv, NULL);
393
394 g_test_add_func ("/mu-contacts/01", test_mu_contacts_01);
395 g_test_add_func ("/mu-contacts/02", test_mu_contacts_02);
396
397 g_log_set_handler (NULL,
398 (GLogLevelFlags)
399 (G_LOG_LEVEL_MASK | G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION),
400 (GLogFunc)black_hole, NULL);
401
402 return g_test_run ();
403 }
404 #endif /*BUILD_TESTS*/
405