1 /*
2 * string operations
3 */
4
5 #include <stdio.h>
6 #include <ctype.h>
7 #include <sys/types.h>
8 #include <string.h>
9 #include "libc.h"
10 #include "news.h"
11
12 int
spacein(s)13 spacein(s)
14 register const char *s;
15 {
16 return strchr(s, ' ') != NULL || strchr(s, '\t') != NULL;
17 }
18
19 /* simple tr: change `from' to `to' in `s' */
20 void
stranslit(register char * s,register char from,register char to)21 stranslit(register char *s, register char from, register char to)
22 {
23 for (; *s != '\0'; s++)
24 if (*s == from)
25 *s = to;
26 }
27
28 /*
29 * Return strsave() of the first word in "tokens".
30 * Words are delimited by spaces.
31 */
32 char *
first(tokens)33 first(tokens)
34 char *tokens;
35 {
36 return strsvto(tokens, ' ');
37 }
38
39 /*
40 * Turn a newsgroup name into a file name, in place.
41 */
42 void
mkfilenm(ng)43 mkfilenm(ng)
44 register char *ng;
45 {
46 stranslit(ng, NGDELIM, FNDELIM);
47 }
48
49 char *
trim(s)50 trim(s) /* trim trailing newline */
51 char *s;
52 {
53 register char *nl;
54
55 /* STRRCHR(s, '\n', nl); */
56 nl = strrchr(s, '\n');
57 if (nl != NULL)
58 *nl = '\0';
59 return s;
60 }
61
62 char *
skipsp(s)63 skipsp(s) /* skip any whitespace at *s */
64 register char *s;
65 {
66 while (iswhite(*s))
67 s++;
68 return s;
69 }
70
71 char *
strsvto(s,c)72 strsvto(s, c) /* save s up to (but excluding) c */
73 char *s;
74 int c;
75 {
76 register char *endp, *copy;
77
78 STRCHR(s, c, endp); /* find interesting part's end of s */
79 if (endp != NULL)
80 *endp = '\0'; /* restored below */
81 copy = strsave(s); /* copy interesting substring of s */
82 if (endp != NULL)
83 *endp = c;
84 return copy;
85 }
86
87 int
charcount(s,c)88 charcount(s, c) /* how many c's in s? */
89 register char *s;
90 register int c;
91 {
92 register int count = 0;
93
94 /*
95 * this is arguably cleaner, but slower:
96 * for (; (s = strchr(s, c)) != NULL; s = (s == NULL? NULL: s+1))
97 * ++count;
98 */
99 while (*s != '\0')
100 if (*s++ == c)
101 ++count;
102 return count;
103 }
104
105 char *
nullify(s)106 nullify(s) /* return s or "" if NULL */
107 register char *s;
108 {
109 return (s == NULL? strdup("") : s);
110 }
111
112
113 /* hostname and path routines follow */
114
115
116 #define CHARSETWIDTH 8 /* bits per character */
117 #define CHARSETSIZE (1<<CHARSETWIDTH) /* 2^CHARSETWIDTH */
118
119 #define initishostchar() (setishostchar? 0: doinitishostchar())
120 /* These macros are both (currently) safe. */
121 /* If c is NUL, hostchar will be false, so don't test (optimisation: ==). */
122 #define nothostchar(c) (!hostchar(c) /* || (c) == '\0' */ )
123 /* True if c can be part of a hostname. False may mean c is NUL. */
124 #define hostchar(c) ishostchar[(c) & (CHARSETSIZE-1)]
125
126 static char ishostchar[CHARSETSIZE]; /* char. sets > Latin-1 are out of luck */
127 static int setishostchar = NO;
128
129 /*
130 * RFC 850 allows letters, digits, periods, and hyphens and specifically
131 * disallows blanks in hostnames.
132 */
133 STATIC
134 void
doinitishostchar()135 doinitishostchar()
136 {
137 if (!setishostchar) {
138 register char *p;
139 register int c;
140
141 setishostchar = YES;
142 for (c = 0, p = ishostchar; c < sizeof ishostchar; c++)
143 *p++ = isascii(c) && isalnum(c);
144 ishostchar['.'] = ishostchar['-'] = YES;
145 }
146 }
147
148 /*
149 * Return true iff any host in hosts appears in s, as per hostin().
150 * hosts are separated by non-hostname characters.
151 */
152 boolean
anyhostin(hosts,s)153 anyhostin(hosts, s)
154 char *hosts, *s;
155 {
156 register char *host = hosts;
157
158 while (*host != '\0') {
159 register char *delimp;
160 register int delim;
161 register boolean hostisin;
162
163 initishostchar();
164 while (nothostchar(*host) && *host != '\0')
165 ++host; /* skip leading delims */
166 if (*host == '\0') /* no more hosts */
167 break;
168 for (delimp = host; hostchar(*delimp); delimp++)
169 ; /* skip to next delim */
170 delim = *delimp; /* may be NUL */
171 *delimp = '\0'; /* terminate host */
172 hostisin = hostin(host, s);
173 *delimp = delim; /* restore hosts delimiter */
174 if (hostisin)
175 return YES;
176 host = delimp; /* advance to next host */
177 }
178 return NO;
179 }
180
181 /*
182 * Return pointer to the first byte after host in path, if any,
183 * with no characters from the alphabet of legal hostname characters
184 * immediately adjacent.
185 * This function is a profiling hot spot, so it has been optimised.
186 */
187 STATIC char *
findhost(host,path)188 findhost(host, path)
189 register char *host, *path;
190 {
191 register char *pathp, *nxpathp;
192 register int hostlen = strlen(host);
193
194 initishostchar();
195 for (pathp = path; ; pathp = nxpathp + 1) {
196 STRCHR(pathp, host[0], nxpathp); /* find plausible start */
197 if (nxpathp == NULL)
198 return NULL; /* path exhausted */
199 pathp = nxpathp;
200 if (STREQN(pathp, host, hostlen) &&
201 (pathp == path || nothostchar(pathp[-1])) &&
202 nothostchar(pathp[hostlen]))
203 return &pathp[hostlen];
204 }
205 }
206
207 /*
208 * Return true iff host appears in s, with no characters from the alphabet
209 * of legal hostname characters immediately adjacent.
210 */
211 boolean
hostin(host,s)212 hostin(host, s)
213 register char *host, *s;
214 {
215 return findhost(host, s) != NULL;
216 }
217
218 /*
219 * Return the number of machines appearing in path,
220 * by counting transitions from delimiters.
221 * See anyhostin() for the rules, and the macros.
222 */
223 int
hopcount(path)224 hopcount(path)
225 register char *path;
226 {
227 register int count = 0;
228
229 initishostchar();
230 for (; *path != '\0'; path++)
231 if (nothostchar(path[0]) &&
232 (hostchar(path[1]) || path[1] == '\0'))
233 ++count; /* trailing edge of delimiters */
234 return count;
235 }
236
237 char *
sendersite(path)238 sendersite(path)
239 register char *path;
240 {
241 register char *p;
242 static char *sender = NULL;
243
244 initishostchar();
245 nnfree(&sender); /* free the last answer */
246 for (p = path; hostchar(*p); p++)
247 ;
248 if (*p == '\0') /* only a user name */
249 return strdup("(local)"); /* a local posting */
250 else {
251 register int delim = *p;
252
253 *p = '\0';
254 sender = strsave(path); /* copy the first machine name */
255 *p = delim;
256 return sender;
257 }
258 }
259
260 /*
261 * Canonicalise rawpath: NULL -> "", chop last site (actually user name) but not
262 * its leading delimiter, and if Approved:, chop everything after the site,
263 * and its trailing delimiter, from Approved: (or Sender:) (user@host).
264 * Result is malloced memory.
265 * This is also a profiling hot spot.
266 */
267 char *
canonpath(rawpath,approved,sender)268 canonpath(rawpath, approved, sender)
269 char *rawpath, *approved, *sender;
270 {
271 register char *newpath = strsave(nullify(rawpath)); /* costly */
272 register char *p, *lastdelim = newpath, *site = NULL;
273
274 initishostchar();
275 for (p = newpath; *p != '\0'; ++p)
276 if (nothostchar(*p))
277 lastdelim = p + 1; /* just past delim */
278 *lastdelim = '\0'; /* omit user's name */
279
280 if (approved != NULL) { /* moderated article */
281 STRCHR(approved, '@', site);
282 if (site == NULL)
283 STRCHR(nullify(sender), '@', site);
284 }
285 if (site != NULL) {
286 p = findhost(site+1, newpath);
287 if (p != NULL && *p++ != '\0') /* delim after site? */
288 *p = '\0'; /* terminate newpath after site */
289 }
290 return newpath;
291 }
292