1 /*
2  * ufdbHostnames.c - URLfilterDB
3  *
4  * ufdbGuard is copyrighted (C) 2005-2020 by URLfilterDB with all rights reserved.
5  *
6  * Cached IP and hostname lookups.
7  *
8  * Parts of ufdbGuard are based on squidGuard.
9  * This module is NOT based on squidGuard.
10  *
11  * RCS $Id: ufdbHostnames.c,v 1.10 2020/05/31 21:22:51 root Exp root $
12  */
13 
14 #include "ufdb.h"
15 #include "ufdbHostnames.h"
16 #include "ufdblib.h"
17 #include "ufdblocks.h"
18 #include "ufdbHashtable.h"
19 
20 #include <time.h>
21 #include <netinet/in.h>
22 #include <sys/socket.h>
23 #include <netdb.h>
24 #include <arpa/inet.h>
25 
26 #ifdef __cplusplus
27 extern "C" {
28 #endif
29 
30 static struct UFDBhashtable * IPcache = NULL;
31 static ufdb_mutex  IPcacheLock = ufdb_mutex_initializer;
32 static time_t      lastIPcachePurgeTime = 0;
33 static time_t      OldestInIPcache = 0;
34 
35 
36 struct IPcacheElem {
37    char * host;
38    int    status;
39    char * copyOfIP;
40    time_t lastAccess;
41 };
42 
43 
IPhashFunc(const void * key)44 unsigned int IPhashFunc( const void * key )
45 {
46    unsigned char * k;
47    unsigned int    value;
48 
49    value = 19;
50    k = (unsigned char *) key;
51    while (*k != '\0')
52    {
53       value = (value * 13)  ^  ((((unsigned int)(*k)) << 4) + (unsigned int)(*k));
54       k++;
55    }
56    return value & 0x7FFFFFFF;
57 }
58 
59 
IPkeyEqFunc(const void * av,const void * bv)60 int IPkeyEqFunc( const void * av, const void * bv )
61 {
62 #if 0
63    if (ufdbGV.debug > 1)
64       ufdbLogMessage( "            IPkeyEqFunc( %s, %s )", (char *) av, (char *) bv );		/* TODO: remove */
65 #endif
66 
67    return strcmp( (char *) av, (char *) bv ) == 0;
68 }
69 
70 
71 UFDB_GCC_INLINE
findIPinCache(char * ip,char * host,int * status)72 static int findIPinCache( char * ip, char * host, int * status )
73 {
74    struct IPcacheElem * elem;
75 
76    if (IPcache == NULL)
77       return 0;
78 
79    elem = (struct IPcacheElem *) UFDBsearchHashtable( IPcache, ip, 0 );
80    if (elem == NULL)
81       return 0;
82 
83    /* Note that we do not bother here about ufdbGV.databaseLoadTime since cached IP addresses do not change */
84 
85    elem->lastAccess = time( NULL );
86    *status = elem->status;
87    if (*status)
88       strcpy( host, elem->host );
89 
90    return 1;
91 }
92 
93 
purgeIPcache(const void * key,const void * value)94 static int purgeIPcache( const void * key, const void * value )
95 {
96    struct IPcacheElem * elem;
97 
98    elem = (struct IPcacheElem *) value;
99    if (elem->lastAccess < OldestInIPcache)
100    {
101       if (ufdbGV.debug > 1)
102          ufdbLogMessage( "      key %s purged", (char*) key );
103       ufdbFree( elem->host );
104       ufdbFree( elem->copyOfIP );
105       /* caller frees key and value */
106       return 1;
107    }
108 
109    return 0;
110 }
111 
112 
addIPtoCache(char * ip,const char * host,int status)113 static void addIPtoCache( char * ip, const char * host, int status )
114 {
115    struct IPcacheElem * elem;
116    time_t               now;
117 
118    now = time( NULL );
119    if (IPcache == NULL)
120    {
121       ufdb_mutex_lock( &IPcacheLock );
122       if (IPcache == NULL)
123       {
124 	 IPcache = UFDBcreateHashtable( 317, IPhashFunc, IPkeyEqFunc );
125 	 lastIPcachePurgeTime = now;
126       }
127       ufdb_mutex_unlock( &IPcacheLock );
128    }
129 
130    if (ufdbGV.debug > 1)
131       ufdbLogMessage( "      addIPtoCache: IP %s  host %s  status %d", ip, host, status );
132 
133    elem = (struct IPcacheElem *) UFDBsearchHashtable( IPcache, ip, 1 );
134    if (elem != NULL)
135    {
136       if (ufdbGV.debug > 1)
137 	 ufdbLogMessage( "      addIPtoCache: IP %s already in cache.  updated status to %d", ip, status );
138       elem->status = status;
139       elem->lastAccess = now;
140       return;
141    }
142 
143    elem = (struct IPcacheElem *) ufdbMalloc( sizeof(struct IPcacheElem) );
144    elem->host = ufdbStrdup( host );
145    elem->status = status;
146    elem->copyOfIP = ufdbStrdup( ip );	/* TODO: this is the same as the key ?!?! */
147    elem->lastAccess = now;
148 
149    /* before calling UFDBinsertHashtable, check if the cache needs a purge and
150     * take advantage of the hash table already being locked.
151     */
152    if ((IPcache->nEntries > 9500  &&  now > lastIPcachePurgeTime + 60*60)  ||  (now > lastIPcachePurgeTime + 3*60*60))
153    {
154       int oldnEntries;
155 
156       if (ufdbGV.debug > 1)
157 	 ufdbLogMessage( "      addIPtoCache: going to purge the IP cache.  "
158                          "nEntries %d  now %ld  lastIPcachePurgeTime %ld  diff %ld",
159 			 IPcache->nEntries, now, lastIPcachePurgeTime, now - lastIPcachePurgeTime );
160 
161       oldnEntries = IPcache->nEntries;
162       if (oldnEntries > 9500)
163 	 OldestInIPcache = now - 2 * 60 * 60;		/* 2 hours */
164       else
165 	 OldestInIPcache = now - 3 * 60 * 60;		/* 3 hours */
166       UFDBpurgeHashtable( IPcache, purgeIPcache );
167 
168       lastIPcachePurgeTime = now;
169       ufdbLogMessage( "addIPtoCache: IP cache %08lx was purged: %d of %d entries were removed",
170                       (unsigned long) IPcache, oldnEntries - IPcache->nEntries, oldnEntries );
171    }
172 
173    UFDBinsertHashtable( IPcache, ufdbStrdup(ip), elem, 1 );
174 }
175 
176 
UFDBfindCorrectHostNameForIPv6(char * ip,char * host)177 static int UFDBfindCorrectHostNameForIPv6( char * ip, char * host )
178 {
179    struct sockaddr_in6 sa;
180    struct addrinfo     addrinfo_hints;
181    struct addrinfo *   addrinfo;
182    struct addrinfo *   list;
183    int ret;
184 
185    sa.sin6_family = AF_INET6;
186    sa.sin6_port = htons( 443 );
187    inet_pton( AF_INET6, ip, &sa.sin6_addr );
188    ret = getnameinfo( (struct sockaddr *) &sa, sizeof(struct sockaddr_in6), host, 1023, NULL, 0, NI_NAMEREQD );
189    if (ret != 0)
190    {
191       if (ufdbGV.debug > 2)
192 	 ufdbLogMessage( "      UFDBfindCorrectHostNameForIPv6: reverse lookup for IPv6 %s failed: %s",
193                          ip, gai_strerror(ret) );
194       addIPtoCache( ip, "unknown", 0 );
195       return 0;
196    }
197 
198    if (ufdbGV.debug > 2)
199       ufdbLogMessage( "      UFDBfindCorrectHostNameForIPv6: reverse hostname for IPv6 %s is %s  - "
200                       "going to doublecheck...", ip, host );
201 
202    addrinfo = NULL;
203    addrinfo_hints.ai_flags = 0;
204    addrinfo_hints.ai_family = AF_INET6;
205    addrinfo_hints.ai_socktype = SOCK_STREAM;
206    addrinfo_hints.ai_protocol = IPPROTO_TCP;
207    addrinfo_hints.ai_addrlen = 0;
208    addrinfo_hints.ai_addr = NULL;
209    addrinfo_hints.ai_canonname = NULL;
210    addrinfo_hints.ai_next = NULL;
211 
212    ret = getaddrinfo( host, NULL, &addrinfo_hints, &addrinfo );
213    if (ret != 0)
214    {
215       if (ufdbGV.debug > 2)
216 	 ufdbLogMessage( "      UFDBfindCorrectHostNameForIPv6: cannot resolve hostname %s (reverse of IPv6 %s): %s", host, ip, gai_strerror(ret) );
217       addIPtoCache( ip, host, 0 );
218       return 0;
219    }
220 
221    /* got a list of reverse addresses... See if one matches with the original IP */
222    ret = 0;
223    for (list = addrinfo;  list != NULL;  list = list->ai_next)
224    {
225       if (list->ai_family == AF_INET6)
226       {
227          if (memcmp( (void*) &((struct sockaddr_in6 *)list->ai_addr)->sin6_addr, (void*) &sa.sin6_addr, sizeof(sa.sin6_addr) ) == 0)
228 	 {
229 	    ret = 1;
230 	    if (ufdbGV.debug)
231 	    {
232 	       char buf[INET6_ADDRSTRLEN];
233 	       ufdbLogMessage( "      UFDBfindCorrectHostNameForIPv6: reverse address match for %s and %s",
234 	                       host, inet_ntop(AF_INET6,&((struct sockaddr_in6 *)list->ai_addr)->sin6_addr,buf,sizeof(buf) ) );
235 	    }
236 	    break;
237 	 }
238       }
239    }
240    addIPtoCache( ip, host, ret );
241 
242    if (ufdbGV.debug > 2)
243       ufdbLogMessage( "      UFDBfindCorrectHostNameForIPv6: IPv6 %s %s a correct and verified reverse hostname %s", ip, ret ? "has" : "does not have", host );
244 
245    freeaddrinfo( addrinfo );
246    return ret;
247 }
248 
249 
250 /*
251  * Given an IP address, try to get the FQDN.
252  * The result is doublechecked because not all reverse names are correct.
253  * E.g. proxy.org is 174.120.113.29 and
254  * 174.120.113.29 is 1d.71.78ae.static.theplanet.com
255  *
256  * return values:
257  *    0  not found
258  *    1  found,    host parameter is assigned
259  */
UFDBfindCorrectHostNameForIP(int worker,char * ip,char * host)260 int UFDBfindCorrectHostNameForIP( int worker, char * ip, char * host )
261 {
262    struct sockaddr_in  sa;
263    struct addrinfo     addrinfo_hints;
264    struct addrinfo *   addrinfo;
265    struct addrinfo *   list;
266    int ret;
267 
268    if (findIPinCache( ip, host, &ret ))
269       return ret;
270 
271    if (strchr( ip, ':' ) != NULL)
272       return UFDBfindCorrectHostNameForIPv6( ip, host );
273 
274    sa.sin_family = AF_INET;
275    sa.sin_port = htons( 443 );
276    inet_pton( AF_INET, ip, &sa.sin_addr );
277    ret = getnameinfo( (struct sockaddr *) &sa, sizeof(struct sockaddr_in), host, 1023, NULL, 0, NI_NAMEREQD );
278    if (ret != 0)
279    {
280       if (ufdbGV.debug > 2)
281 	 ufdbLogMessage( "W%30d: UFDBfindCorrectHostNameForIP: reverse lookup for IP %s failed: %s",
282                          worker, ip, gai_strerror(ret) );
283       addIPtoCache( ip, "unknown", 0 );
284       return 0;
285    }
286 
287    if (ufdbGV.debug > 2)
288       ufdbLogMessage( "W%03d: UFDBfindCorrectHostNameForIP: reverse hostname for IP %s is %s  - "
289                       "going to doublecheck...", worker, ip, host );
290 
291    addrinfo = NULL;
292    addrinfo_hints.ai_flags = 0;
293    addrinfo_hints.ai_family = AF_INET;
294    addrinfo_hints.ai_socktype = SOCK_STREAM;
295    addrinfo_hints.ai_protocol = IPPROTO_TCP;
296    addrinfo_hints.ai_addrlen = 0;
297    addrinfo_hints.ai_addr = NULL;
298    addrinfo_hints.ai_canonname = NULL;
299    addrinfo_hints.ai_next = NULL;
300 
301    ret = getaddrinfo( host, NULL, &addrinfo_hints, &addrinfo );
302    if (ret != 0)
303    {
304       if (ufdbGV.debug > 2)
305 	 ufdbLogMessage( "W%03d: UFDBfindCorrectHostNameForIP: cannot resolve hostname %s (reverse of IP %s): %s",
306                          worker, host, ip, gai_strerror(ret) );
307       addIPtoCache( ip, host, 0 );
308       return 0;
309    }
310 
311    /* got a list of reverse addresses... See if one matches with the original IP */
312    ret = 0;
313    for (list = addrinfo;  list != NULL;  list = list->ai_next)
314    {
315       if (list->ai_family == AF_INET)
316       {
317          if (memcmp( (void*) &((struct sockaddr_in *)list->ai_addr)->sin_addr,
318                      (void*) &sa.sin_addr, sizeof(sa.sin_addr) ) == 0)
319 	 {
320 	    ret = 1;
321 	    if (ufdbGV.debug)
322 	    {
323 	       char buf[INET_ADDRSTRLEN];
324 	       ufdbLogMessage( "W%03d: UFDBfindCorrectHostNameForIP: reverse address match for %s and %s",
325 	                       worker, host,
326                                inet_ntop(AF_INET,&((struct sockaddr_in *)list->ai_addr)->sin_addr,buf,sizeof(buf) ) );
327 	    }
328 	    break;
329 	 }
330       }
331    }
332    addIPtoCache( ip, host, ret );
333 
334    if (ufdbGV.debug > 2)
335       ufdbLogMessage( "W%03d: UFDBfindCorrectHostNameForIP: IP %s %s a correct and verified reverse hostname %s",
336                       worker, ip, ret ? "has" : "does not have", host );
337 
338    freeaddrinfo( addrinfo );
339    return ret;
340 }
341 
342 
343 #ifdef __cplusplus
344 }
345 #endif
346