1 /*
2  * string operations
3  */
4 
5 #include <stdio.h>
6 #include <ctype.h>
7 #include <sys/types.h>
8 #include <string.h>
9 #include "libc.h"
10 #include "news.h"
11 
12 int
spacein(s)13 spacein(s)
14 register const char *s;
15 {
16 	return strchr(s, ' ') != NULL || strchr(s, '\t') != NULL;
17 }
18 
19 /* simple tr: change `from' to `to' in `s' */
20 void
stranslit(register char * s,register char from,register char to)21 stranslit(register char *s, register char from, register char to)
22 {
23 	for (; *s != '\0'; s++)
24 		if (*s == from)
25 			*s = to;
26 }
27 
28 /*
29  * Return strsave() of the first word in "tokens".
30  * Words are delimited by spaces.
31  */
32 char *
first(tokens)33 first(tokens)
34 char *tokens;
35 {
36 	return strsvto(tokens, ' ');
37 }
38 
39 /*
40  * Turn a newsgroup name into a file name, in place.
41  */
42 void
mkfilenm(ng)43 mkfilenm(ng)
44 register char *ng;
45 {
46 	stranslit(ng, NGDELIM, FNDELIM);
47 }
48 
49 char *
trim(s)50 trim(s)					/* trim trailing newline */
51 char *s;
52 {
53 	register char *nl;
54 
55 /*	STRRCHR(s, '\n', nl);	*/
56 	nl = strrchr(s, '\n');
57 	if (nl != NULL)
58 		*nl = '\0';
59 	return s;
60 }
61 
62 char *
skipsp(s)63 skipsp(s)				/* skip any whitespace at *s */
64 register char *s;
65 {
66 	while (iswhite(*s))
67 		s++;
68 	return s;
69 }
70 
71 char *
strsvto(s,c)72 strsvto(s, c)				/* save s up to (but excluding) c */
73 char *s;
74 int c;
75 {
76 	register char *endp, *copy;
77 
78 	STRCHR(s, c, endp);		/* find interesting part's end of s */
79 	if (endp != NULL)
80 		*endp = '\0';		/* restored below */
81 	copy = strsave(s);		/* copy interesting substring of s */
82     	if (endp != NULL)
83 	    	*endp = c;
84 	return copy;
85 }
86 
87 int
charcount(s,c)88 charcount(s, c)			/* how many c's in s? */
89 register char *s;
90 register int c;
91 {
92 	register int count = 0;
93 
94 	/*
95 	 * this is arguably cleaner, but slower:
96 	 * for (; (s = strchr(s, c)) != NULL; s = (s == NULL? NULL: s+1))
97 	 *	++count;
98 	 */
99 	while (*s != '\0')
100 		if (*s++ == c)
101 			++count;
102 	return count;
103 }
104 
105 char *
nullify(s)106 nullify(s)				/* return s or "" if NULL */
107 register char *s;
108 {
109 	return (s == NULL? strdup("") : s);
110 }
111 
112 
113 /* hostname and path routines follow */
114 
115 
116 #define CHARSETWIDTH 8			/* bits per character */
117 #define CHARSETSIZE  (1<<CHARSETWIDTH)	/* 2^CHARSETWIDTH */
118 
119 #define initishostchar() (setishostchar? 0: doinitishostchar())
120 /* These macros are both (currently) safe. */
121 /* If c is NUL, hostchar will be false, so don't test (optimisation: ==). */
122 #define nothostchar(c) (!hostchar(c) /* || (c) == '\0' */ )
123 /* True if c can be part of a hostname.  False may mean c is NUL. */
124 #define hostchar(c) ishostchar[(c) & (CHARSETSIZE-1)]
125 
126 static char ishostchar[CHARSETSIZE];	/* char. sets > Latin-1 are out of luck */
127 static int setishostchar = NO;
128 
129 /*
130  * RFC 850 allows letters, digits, periods, and hyphens and specifically
131  * disallows blanks in hostnames.
132  */
133 STATIC
134 void
doinitishostchar()135 doinitishostchar()
136 {
137 	if (!setishostchar) {
138 		register char *p;
139 		register int c;
140 
141 		setishostchar = YES;
142 		for (c = 0, p = ishostchar; c < sizeof ishostchar; c++)
143 			*p++ = isascii(c) && isalnum(c);
144 		ishostchar['.'] = ishostchar['-'] = YES;
145 	}
146 }
147 
148 /*
149  * Return true iff any host in hosts appears in s, as per hostin().
150  * hosts are separated by non-hostname characters.
151  */
152 boolean
anyhostin(hosts,s)153 anyhostin(hosts, s)
154 char *hosts, *s;
155 {
156 	register char *host = hosts;
157 
158 	while (*host != '\0') {
159 		register char *delimp;
160 		register int delim;
161 		register boolean hostisin;
162 
163 		initishostchar();
164 		while (nothostchar(*host) && *host != '\0')
165 			++host;			/* skip leading delims */
166 		if (*host == '\0')		/* no more hosts */
167 			break;
168 		for (delimp = host; hostchar(*delimp); delimp++)
169 			;			/* skip to next delim */
170 		delim = *delimp;		/* may be NUL */
171 		*delimp = '\0';			/* terminate host */
172 		hostisin = hostin(host, s);
173 		*delimp = delim;		/* restore hosts delimiter */
174 		if (hostisin)
175 			return YES;
176 		host = delimp;			/* advance to next host */
177 	}
178 	return NO;
179 }
180 
181 /*
182  * Return pointer to the first byte after host in path, if any,
183  * with no characters from the alphabet of legal hostname characters
184  * immediately adjacent.
185  * This function is a profiling hot spot, so it has been optimised.
186  */
187 STATIC char *
findhost(host,path)188 findhost(host, path)
189 register char *host, *path;
190 {
191 	register char *pathp, *nxpathp;
192 	register int hostlen = strlen(host);
193 
194 	initishostchar();
195 	for (pathp = path; ; pathp = nxpathp + 1) {
196 		STRCHR(pathp, host[0], nxpathp);	/* find plausible start */
197 		if (nxpathp == NULL)
198 			return NULL;		/* path exhausted */
199 		pathp = nxpathp;
200 		if (STREQN(pathp, host, hostlen) &&
201 		    (pathp == path || nothostchar(pathp[-1])) &&
202 		    nothostchar(pathp[hostlen]))
203 			return &pathp[hostlen];
204 	}
205 }
206 
207 /*
208  * Return true iff host appears in s, with no characters from the alphabet
209  * of legal hostname characters immediately adjacent.
210  */
211 boolean
hostin(host,s)212 hostin(host, s)
213 register char *host, *s;
214 {
215 	return findhost(host, s) != NULL;
216 }
217 
218 /*
219  * Return the number of machines appearing in path,
220  * by counting transitions from delimiters.
221  * See anyhostin() for the rules, and the macros.
222  */
223 int
hopcount(path)224 hopcount(path)
225 register char *path;
226 {
227 	register int count = 0;
228 
229 	initishostchar();
230 	for (; *path != '\0'; path++)
231 		if (nothostchar(path[0]) &&
232 		    (hostchar(path[1]) || path[1] == '\0'))
233 			++count;	/* trailing edge of delimiters */
234 	return count;
235 }
236 
237 char *
sendersite(path)238 sendersite(path)
239 register char *path;
240 {
241 	register char *p;
242 	static char *sender = NULL;
243 
244 	initishostchar();
245 	nnfree(&sender);		/* free the last answer */
246 	for (p = path; hostchar(*p); p++)
247 		;
248 	if (*p == '\0')			/* only a user name */
249 		return strdup("(local)");	/* a local posting */
250 	else {
251 		register int delim = *p;
252 
253 		*p = '\0';
254 		sender = strsave(path);	/* copy the first machine name */
255 		*p = delim;
256 		return sender;
257 	}
258 }
259 
260 /*
261  * Canonicalise rawpath: NULL -> "", chop last site (actually user name) but not
262  * its leading delimiter, and if Approved:, chop everything after the site,
263  * and its trailing delimiter, from Approved: (or Sender:) (user@host).
264  * Result is malloced memory.
265  * This is also a profiling hot spot.
266  */
267 char *
canonpath(rawpath,approved,sender)268 canonpath(rawpath, approved, sender)
269 char *rawpath, *approved, *sender;
270 {
271 	register char *newpath = strsave(nullify(rawpath));	/* costly */
272 	register char *p, *lastdelim = newpath, *site = NULL;
273 
274 	initishostchar();
275 	for (p = newpath; *p != '\0'; ++p)
276 		if (nothostchar(*p))
277 			lastdelim = p + 1;	/* just past delim */
278 	*lastdelim = '\0';			/* omit user's name */
279 
280 	if (approved != NULL) {			/* moderated article */
281 		STRCHR(approved, '@', site);
282 		if (site == NULL)
283 			STRCHR(nullify(sender), '@', site);
284 	}
285 	if (site != NULL) {
286 		p = findhost(site+1, newpath);
287 		if (p != NULL && *p++ != '\0')	/* delim after site? */
288 			*p = '\0';		/* terminate newpath after site */
289 	}
290 	return newpath;
291 }
292