1 /*
2 ** Copyright (C) 2019 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
3 **
4 ** This program is free software; you can redistribute it and/or modify it
5 ** under the terms of the GNU General Public License as published by the
6 ** Free Software Foundation; either version 3, or (at your option) any
7 ** later version.
8 **
9 ** This program is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 ** GNU General Public License for more details.
13 **
14 ** You should have received a copy of the GNU General Public License
15 ** along with this program; if not, write to the Free Software Foundation,
16 ** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 **
18 */
19 
20 #include "mu-contacts.hh"
21 
22 #include <mutex>
23 #include <unordered_map>
24 #include <set>
25 #include <sstream>
26 #include <functional>
27 #include <algorithm>
28 #include <regex>
29 #include <ctime>
30 
31 #include <utils/mu-utils.hh>
32 #include <glib.h>
33 
34 using namespace Mu;
35 
ContactInfo(const std::string & _full_address,const std::string & _email,const std::string & _name,bool _personal,time_t _last_seen,size_t _freq)36 ContactInfo::ContactInfo (const std::string& _full_address,
37                           const std::string& _email,
38                           const std::string& _name,
39                           bool _personal,
40                           time_t _last_seen,
41                           size_t _freq):
42         full_address{_full_address},
43         email{_email},
44         name{_name},
45         personal{_personal},
46         last_seen{_last_seen},
47         freq{_freq},
48         tstamp{g_get_monotonic_time()} {}
49 
50 struct EmailHash {
operator ()EmailHash51         std::size_t operator()(const std::string& email) const {
52                 std::size_t djb = 5381; // djb hash
53                 for (const auto c : email)
54                         djb = ((djb << 5) + djb) + g_ascii_tolower(c);
55                 return djb;
56         }
57 };
58 
59 struct EmailEqual {
operator ()EmailEqual60         bool operator()(const std::string& email1, const std::string& email2) const {
61                 return g_ascii_strcasecmp(email1.c_str(), email2.c_str()) == 0;
62         }
63 };
64 
65 struct ContactInfoHash {
operator ()ContactInfoHash66         std::size_t operator()(const ContactInfo& ci) const {
67                 std::size_t djb = 5381; // djb hash
68                 for (const auto c : ci.email)
69                         djb = ((djb << 5) + djb) + g_ascii_tolower(c);
70                 return djb;
71         }
72 };
73 
74 struct ContactInfoEqual {
operator ()ContactInfoEqual75         bool operator()(const Mu::ContactInfo& ci1, const Mu::ContactInfo& ci2) const {
76                 return g_ascii_strcasecmp(ci1.email.c_str(), ci2.email.c_str()) == 0;
77         }
78 };
79 
80 constexpr auto RecentOffset{15 * 24 * 3600};
81 struct ContactInfoLessThan {
82 
ContactInfoLessThanContactInfoLessThan83         ContactInfoLessThan(): recently_{::time({}) - RecentOffset} {}
84 
operator ()ContactInfoLessThan85         bool operator()(const Mu::ContactInfo& ci1, const Mu::ContactInfo& ci2) const {
86 
87                 if (ci1.personal != ci2.personal)
88                         return ci1.personal; // personal comes first
89 
90                 if ((ci1.last_seen > recently_) != (ci2.last_seen > recently_))
91                         return ci1.last_seen > ci2.last_seen;
92 
93                 if (ci1.freq != ci2.freq) // more frequent comes first
94                         return ci1.freq > ci2.freq;
95 
96                 return g_ascii_strcasecmp(ci1.email.c_str(), ci2.email.c_str()) < 0;
97         }
98         // only sort recently seen contacts by recency; approx 15 days.
99         // this changes during the lifetime, but that's all fine.
100         const time_t recently_;
101 };
102 
103 using ContactUMap = std::unordered_map<const std::string, ContactInfo, EmailHash, EmailEqual>;
104 //using ContactUSet = std::unordered_set<ContactInfo, ContactInfoHash, ContactInfoEqual>;
105 using ContactSet  = std::set<std::reference_wrapper<const ContactInfo>, ContactInfoLessThan>;
106 
107 struct Contacts::Private {
PrivateContacts::Private108         Private(const std::string& serialized,
109                 const StringVec& personal):
110                 contacts_{deserialize(serialized)} {
111                 make_personal(personal);
112         }
113 
114         void make_personal(const StringVec& personal);
115         ContactUMap deserialize(const std::string&) const;
116         std::string serialize() const;
117 
118         ContactUMap contacts_;
119         std::mutex  mtx_;
120 
121         StringVec               personal_plain_;
122         std::vector<std::regex> personal_rx_;
123 };
124 
125 constexpr auto Separator = "\xff"; // Invalid in UTF-8
126 
127 void
make_personal(const StringVec & personal)128 Contacts::Private::make_personal (const StringVec& personal)
129 {
130         for (auto&& p: personal)  {
131 
132                 if (p.empty())
133                         continue; // invalid
134 
135                 if (p.size() < 2 || p.at(0) != '/' || p.at(p.length() - 1) != '/')
136                         personal_plain_.emplace_back(p); // normal address
137                 else {
138                         // a regex pattern.
139                         try {
140                                 const auto rxstr{p.substr(1, p.length()-2)};
141                                 personal_rx_.emplace_back(
142                                         std::regex(rxstr,
143                                                    std::regex::basic |
144                                                    std::regex::optimize |
145                                                    std::regex::icase));
146 
147                         } catch (const std::regex_error& rex) {
148                                 g_warning ("invalid personal address regexp '%s': %s",
149                                            p.c_str(), rex.what());
150                         }
151                 }
152         }
153 }
154 
155 ContactUMap
deserialize(const std::string & serialized) const156 Contacts::Private::deserialize(const std::string& serialized) const
157 {
158         ContactUMap contacts;
159         std::stringstream ss{serialized, std::ios_base::in};
160         std::string line;
161 
162         while (getline (ss, line)) {
163 
164                 const auto parts = Mu::split (line, Separator);
165                 if (G_UNLIKELY(parts.size() != 6)) {
166                         g_warning ("error: '%s'", line.c_str());
167                         continue;
168                 }
169 
170                 ContactInfo ci(std::move(parts[0]), // full address
171                                parts[1], // email
172                                std::move(parts[2]), // name
173                                parts[3][0] == '1' ? true : false, // personal
174                                (time_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // last_seen
175                                (std::size_t)g_ascii_strtoll(parts[5].c_str(), NULL, 10)); // freq
176 
177                 contacts.emplace(std::move(parts[1]), std::move(ci));
178         }
179 
180         return contacts;
181 }
182 
183 
Contacts(const std::string & serialized,const StringVec & personal)184 Contacts::Contacts (const std::string& serialized, const StringVec& personal) :
185         priv_{std::make_unique<Private>(serialized, personal)}
186 {}
187 
188 Contacts::~Contacts() = default;
189 
190 std::string
serialize() const191 Contacts::serialize() const
192 {
193         std::lock_guard<std::mutex> l_{priv_->mtx_};
194         std::string s;
195 
196         for (auto& item: priv_->contacts_) {
197                 const auto& ci{item.second};
198                 s += Mu::format("%s%s"
199                                  "%s%s"
200                                  "%s%s"
201                                  "%d%s"
202                                  "%" G_GINT64_FORMAT "%s"
203                                  "%" G_GINT64_FORMAT "\n",
204                                  ci.full_address.c_str(), Separator,
205                                  ci.email.c_str(), Separator,
206                                  ci.name.c_str(), Separator,
207                                  ci.personal ? 1 : 0, Separator,
208                                  (gint64)ci.last_seen, Separator,
209                                  (gint64)ci.freq);
210         }
211 
212         return s;
213 }
214 
215 const ContactInfo
add(ContactInfo && ci)216 Contacts::add (ContactInfo&& ci)
217 {
218         std::lock_guard<std::mutex> l_{priv_->mtx_};
219 
220         auto it = priv_->contacts_.find(ci.email);
221 
222         if (it == priv_->contacts_.end()) { // completely new contact
223 
224                 ci.name         = Mu::remove_ctrl(ci.name);
225                 ci.full_address = remove_ctrl(ci.full_address);
226 
227                 auto email{ci.email};
228                 return priv_->contacts_.emplace(ContactUMap::value_type(email, std::move(ci))).first->second;
229 
230         } else { // existing contact.
231                 auto& ci_existing{it->second};
232                 ++ci_existing.freq;
233 
234                 if (ci.last_seen > ci_existing.last_seen) { // update.
235 
236                         ci_existing.email        = std::move(ci.email);
237                         ci_existing.name         = Mu::remove_ctrl(ci.name);
238                         ci_existing.full_address = Mu::remove_ctrl(ci.full_address);
239 
240                         ci_existing.tstamp       = g_get_monotonic_time();
241                         ci_existing.last_seen    = ci.last_seen;
242                 }
243 
244                 return ci;
245         }
246 }
247 
248 
249 const ContactInfo*
_find(const std::string & email) const250 Contacts::_find (const std::string& email) const
251 {
252         std::lock_guard<std::mutex> l_{priv_->mtx_};
253 
254         const auto it = priv_->contacts_.find(email);
255         if (it == priv_->contacts_.end())
256                 return {};
257         else
258                 return &it->second;
259 }
260 
261 
262 void
clear()263 Contacts::clear()
264 {
265         std::lock_guard<std::mutex> l_{priv_->mtx_};
266 
267         priv_->contacts_.clear();
268 }
269 
270 
271 std::size_t
size() const272 Contacts::size() const
273 {
274         std::lock_guard<std::mutex> l_{priv_->mtx_};
275 
276         return priv_->contacts_.size();
277 }
278 
279 
280 void
for_each(const EachContactFunc & each_contact) const281 Contacts::for_each(const EachContactFunc& each_contact) const
282 {
283         std::lock_guard<std::mutex> l_{priv_->mtx_};
284 
285         if (!each_contact)
286                 return; // nothing to do
287 
288         // first sort them for 'rank'
289         ContactSet sorted;
290         for (const auto& item: priv_->contacts_)
291                 sorted.emplace(item.second);
292 
293         for (const auto& ci: sorted)
294                 each_contact (ci);
295 }
296 
297 bool
is_personal(const std::string & addr) const298 Contacts::is_personal(const std::string& addr) const
299 {
300         for (auto&& p: priv_->personal_plain_)
301                 if (g_ascii_strcasecmp(addr.c_str(), p.c_str()) == 0)
302                         return true;
303 
304         for (auto&& rx: priv_->personal_rx_) {
305                 std::smatch m; // perhaps cache addr in personal_plain_?
306                 if (std::regex_match(addr, m, rx))
307                         return true;
308         }
309 
310         return false;
311 }
312 
313 
314 #ifdef BUILD_TESTS
315 /*
316   * Tests.
317  *
318  */
319 
320 #include "test-mu-common.hh"
321 
322 static void
test_mu_contacts_01()323 test_mu_contacts_01()
324 {
325         Mu::Contacts contacts ("");
326 
327         g_assert_true (contacts.empty());
328         g_assert_cmpuint (contacts.size(), ==, 0);
329 
330         contacts.add(Mu::ContactInfo ("Foo <foo.bar@example.com>",
331                                       "foo.bar@example.com", "Foo", false, 12345));
332         g_assert_false (contacts.empty());
333         g_assert_cmpuint (contacts.size(), ==, 1);
334 
335         contacts.add(Mu::ContactInfo ("Cuux <cuux.fnorb@example.com>",
336                                       "cuux@example.com", "Cuux", false, 54321));
337 
338         g_assert_cmpuint (contacts.size(), ==, 2);
339 
340         contacts.add(Mu::ContactInfo ("foo.bar@example.com",
341                                       "foo.bar@example.com", "Foo", false, 77777));
342         g_assert_cmpuint (contacts.size(), ==, 2);
343 
344         contacts.add(Mu::ContactInfo ("Foo.Bar@Example.Com",
345                                       "Foo.Bar@Example.Com", "Foo", false, 88888));
346         g_assert_cmpuint (contacts.size(), ==, 2);
347         // note: replaces first.
348 
349         {
350                 const auto info = contacts._find("bla@example.com");
351                 g_assert_false (info);
352         }
353 
354         {
355                 const auto info = contacts._find("foo.BAR@example.com");
356                 g_assert_true (info);
357 
358                 g_assert_cmpstr(info->email.c_str(), ==, "Foo.Bar@Example.Com");
359         }
360 
361         contacts.clear();
362         g_assert_true (contacts.empty());
363         g_assert_cmpuint (contacts.size(), ==, 0);
364 }
365 
366 static void
test_mu_contacts_02()367 test_mu_contacts_02()
368 {
369         Mu::StringVec personal = {
370                 "foo@example.com",
371                 "bar@cuux.org",
372                 "/bar-.*@fnorb.f./"
373         };
374         Mu::Contacts contacts{"", personal};
375 
376         g_assert_true (contacts.is_personal("foo@example.com"));
377         g_assert_true (contacts.is_personal("Bar@CuuX.orG"));
378         g_assert_true (contacts.is_personal("bar-123abc@fnorb.fi"));
379         g_assert_true (contacts.is_personal("bar-zzz@fnorb.fr"));
380 
381         g_assert_false (contacts.is_personal("foo@bar.com"));
382         g_assert_false (contacts.is_personal("BÂr@CuuX.orG"));
383         g_assert_false (contacts.is_personal("bar@fnorb.fi"));
384         g_assert_false (contacts.is_personal("bar-zzz@fnorb.xr"));
385 }
386 
387 
388 
389 int
main(int argc,char * argv[])390 main (int argc, char *argv[])
391 {
392         g_test_init (&argc, &argv, NULL);
393 
394         g_test_add_func ("/mu-contacts/01", test_mu_contacts_01);
395         g_test_add_func ("/mu-contacts/02", test_mu_contacts_02);
396 
397         g_log_set_handler (NULL,
398                            (GLogLevelFlags)
399                            (G_LOG_LEVEL_MASK | G_LOG_FLAG_FATAL| G_LOG_FLAG_RECURSION),
400                            (GLogFunc)black_hole, NULL);
401 
402         return g_test_run ();
403 }
404 #endif /*BUILD_TESTS*/
405