1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: head/lib/libarchive/archive_write_disk_set_standard_lookup.c 201083 2009-12-28 02:09:57Z kientzle $");
28 
29 #ifdef HAVE_SYS_TYPES_H
30 #include <sys/types.h>
31 #endif
32 #ifdef HAVE_ERRNO_H
33 #include <errno.h>
34 #endif
35 #ifdef HAVE_GRP_H
36 #include <grp.h>
37 #endif
38 #ifdef HAVE_PWD_H
39 #include <pwd.h>
40 #endif
41 #ifdef HAVE_STDLIB_H
42 #include <stdlib.h>
43 #endif
44 #ifdef HAVE_STRING_H
45 #include <string.h>
46 #endif
47 
48 #include "archive.h"
49 #include "archive_private.h"
50 #include "archive_read_private.h"
51 #include "archive_write_disk_private.h"
52 
53 struct bucket {
54 	char	*name;
55 	int	 hash;
56 	id_t	 id;
57 };
58 
59 static const size_t cache_size = 127;
60 static unsigned int	hash(const char *);
61 static int64_t	lookup_gid(void *, const char *uname, int64_t);
62 static int64_t	lookup_uid(void *, const char *uname, int64_t);
63 static void	cleanup(void *);
64 
65 /*
66  * Installs functions that use getpwnam()/getgrnam()---along with
67  * a simple cache to accelerate such lookups---into the archive_write_disk
68  * object.  This is in a separate file because getpwnam()/getgrnam()
69  * can pull in a LOT of library code (including NIS/LDAP functions, which
70  * pull in DNS resolvers, etc).  This can easily top 500kB, which makes
71  * it inappropriate for some space-constrained applications.
72  *
73  * Applications that are size-sensitive may want to just use the
74  * real default functions (defined in archive_write_disk.c) that just
75  * use the uid/gid without the lookup.  Or define your own custom functions
76  * if you prefer.
77  *
78  * TODO: Replace these hash tables with simpler move-to-front LRU
79  * lists with a bounded size (128 items?).  The hash is a bit faster,
80  * but has a bad pathology in which it thrashes a single bucket.  Even
81  * walking a list of 128 items is a lot faster than calling
82  * getpwnam()!
83  */
84 int
85 archive_write_disk_set_standard_lookup(struct archive *a)
86 {
87 	struct bucket *ucache = malloc(cache_size * sizeof(struct bucket));
88 	struct bucket *gcache = malloc(cache_size * sizeof(struct bucket));
89 	if (ucache == NULL || gcache == NULL) {
90 		free(ucache);
91 		free(gcache);
92 		return (ARCHIVE_FATAL);
93 	}
94 	memset(ucache, 0, cache_size * sizeof(struct bucket));
95 	memset(gcache, 0, cache_size * sizeof(struct bucket));
96 	archive_write_disk_set_group_lookup(a, gcache, lookup_gid, cleanup);
97 	archive_write_disk_set_user_lookup(a, ucache, lookup_uid, cleanup);
98 	return (ARCHIVE_OK);
99 }
100 
101 static int64_t
102 lookup_gid(void *private_data, const char *gname, int64_t gid)
103 {
104 	int h;
105 	struct bucket *b;
106 	struct bucket *gcache = (struct bucket *)private_data;
107 
108 	/* If no gname, just use the gid provided. */
109 	if (gname == NULL || *gname == '\0')
110 		return (gid);
111 
112 	/* Try to find gname in the cache. */
113 	h = hash(gname);
114 	b = &gcache[h % cache_size ];
115 	if (b->name != NULL && b->hash == h && strcmp(gname, b->name) == 0)
116 		return ((gid_t)b->id);
117 
118 	/* Free the cache slot for a new entry. */
119 	if (b->name != NULL)
120 		free(b->name);
121 	b->name = strdup(gname);
122 	/* Note: If strdup fails, that's okay; we just won't cache. */
123 	b->hash = h;
124 #if HAVE_GRP_H
125 #  if HAVE_GETGRNAM_R
126 	{
127 		char _buffer[128];
128 		size_t bufsize = 128;
129 		char *buffer = _buffer;
130 		char *allocated = NULL;
131 		struct group	grent, *result;
132 		int r;
133 
134 		for (;;) {
135 			result = &grent; /* Old getgrnam_r ignores last arg. */
136 			r = getgrnam_r(gname, &grent, buffer, bufsize, &result);
137 			if (r == 0)
138 				break;
139 			if (r != ERANGE)
140 				break;
141 			bufsize *= 2;
142 			free(allocated);
143 			allocated = malloc(bufsize);
144 			if (allocated == NULL)
145 				break;
146 			buffer = allocated;
147 		}
148 		if (result != NULL)
149 			gid = result->gr_gid;
150 		free(allocated);
151 	}
152 #  else /* HAVE_GETGRNAM_R */
153 	{
154 		struct group *result;
155 
156 		result = getgrnam(gname);
157 		if (result != NULL)
158 			gid = result->gr_gid;
159 	}
160 #  endif /* HAVE_GETGRNAM_R */
161 #elif defined(_WIN32) && !defined(__CYGWIN__)
162 	/* TODO: do a gname->gid lookup for Windows. */
163 #else
164 	#error No way to perform gid lookups on this platform
165 #endif
166 	b->id = (gid_t)gid;
167 
168 	return (gid);
169 }
170 
171 static int64_t
172 lookup_uid(void *private_data, const char *uname, int64_t uid)
173 {
174 	int h;
175 	struct bucket *b;
176 	struct bucket *ucache = (struct bucket *)private_data;
177 
178 	/* If no uname, just use the uid provided. */
179 	if (uname == NULL || *uname == '\0')
180 		return (uid);
181 
182 	/* Try to find uname in the cache. */
183 	h = hash(uname);
184 	b = &ucache[h % cache_size ];
185 	if (b->name != NULL && b->hash == h && strcmp(uname, b->name) == 0)
186 		return ((uid_t)b->id);
187 
188 	/* Free the cache slot for a new entry. */
189 	if (b->name != NULL)
190 		free(b->name);
191 	b->name = strdup(uname);
192 	/* Note: If strdup fails, that's okay; we just won't cache. */
193 	b->hash = h;
194 #if HAVE_PWD_H
195 #  if HAVE_GETPWNAM_R
196 	{
197 		char _buffer[128];
198 		size_t bufsize = 128;
199 		char *buffer = _buffer;
200 		char *allocated = NULL;
201 		struct passwd	pwent, *result;
202 		int r;
203 
204 		for (;;) {
205 			result = &pwent; /* Old getpwnam_r ignores last arg. */
206 			r = getpwnam_r(uname, &pwent, buffer, bufsize, &result);
207 			if (r == 0)
208 				break;
209 			if (r != ERANGE)
210 				break;
211 			bufsize *= 2;
212 			free(allocated);
213 			allocated = malloc(bufsize);
214 			if (allocated == NULL)
215 				break;
216 			buffer = allocated;
217 		}
218 		if (result != NULL)
219 			uid = result->pw_uid;
220 		free(allocated);
221 	}
222 #  else /* HAVE_GETPWNAM_R */
223 	{
224 		struct passwd *result;
225 
226 		result = getpwnam(uname);
227 		if (result != NULL)
228 			uid = result->pw_uid;
229 	}
230 #endif	/* HAVE_GETPWNAM_R */
231 #elif defined(_WIN32) && !defined(__CYGWIN__)
232 	/* TODO: do a uname->uid lookup for Windows. */
233 #else
234 	#error No way to look up uids on this platform
235 #endif
236 	b->id = (uid_t)uid;
237 
238 	return (uid);
239 }
240 
241 static void
242 cleanup(void *private)
243 {
244 	size_t i;
245 	struct bucket *cache = (struct bucket *)private;
246 
247 	for (i = 0; i < cache_size; i++)
248 		free(cache[i].name);
249 	free(cache);
250 }
251 
252 
253 static unsigned int
254 hash(const char *p)
255 {
256 	/* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
257 	   as used by ELF for hashing function names. */
258 	unsigned g, h = 0;
259 	while (*p != '\0') {
260 		h = (h << 4) + *p++;
261 		if ((g = h & 0xF0000000) != 0) {
262 			h ^= g >> 24;
263 			h &= 0x0FFFFFFF;
264 		}
265 	}
266 	return h;
267 }
268