1 /* rt-process.c
2 */
3 /* This software is copyrighted as detailed in the LICENSE file. */
4 
5 
6 #include "EXTERN.h"
7 #include "common.h"
8 #include "list.h"
9 #include "intrp.h"
10 #include "trn.h"
11 #include "hash.h"
12 #include "cache.h"
13 #include "bits.h"
14 #include "final.h"
15 #include "ng.h"
16 #include "ngdata.h"
17 #include "nntpclient.h"
18 #include "datasrc.h"
19 #include "nntp.h"
20 #include "rcln.h"
21 #include "util.h"
22 #include "util2.h"
23 #include "kfile.h"
24 #include "rthread.h"
25 #include "rt-select.h"
26 #include "INTERN.h"
27 #include "rt-process.h"
28 #include "rt-process.ih"
29 
30 /* This depends on art being set to the current article number.
31 */
32 ARTICLE*
allocate_article(artnum)33 allocate_article(artnum)
34 ART_NUM artnum;
35 {
36     register ARTICLE* article;
37 
38     /* create an new article */
39     if (artnum >= absfirst)
40 	article = article_ptr(artnum);
41     else {
42 	article = (ARTICLE*)safemalloc(sizeof (ARTICLE));
43 	bzero((char*)article, sizeof (ARTICLE));
44 	article->flags |= AF_FAKE|AF_TMPMEM;
45     }
46     return article;
47 }
48 
49 static void
fix_msgid(msgid)50 fix_msgid(msgid)
51 char* msgid;
52 {
53     register char* cp;
54 
55     if ((cp = index(msgid, '@')) != NULL) {
56 	while (*++cp) {
57 	    if (isupper(*cp)) {
58 		*cp = tolower(*cp);	/* lower-case domain portion */
59 	    }
60 	}
61     }
62 }
63 
64 int
msgid_cmp(key,keylen,data)65 msgid_cmp(key, keylen, data)
66 char* key;
67 int keylen;
68 HASHDATUM data;
69 {
70     /* We already know that the lengths are equal, just compare the strings */
71     if (data.dat_len)
72 	return bcmp(key, data.dat_ptr, keylen);
73     return bcmp(key, ((ARTICLE*)data.dat_ptr)->msgid, keylen);
74 }
75 
76 SUBJECT* fake_had_subj; /* the fake-turned-real article had this subject */
77 
78 bool
valid_article(article)79 valid_article(article)
80 ARTICLE* article;
81 {
82     ARTICLE* ap;
83     ARTICLE* fake_ap;
84     char* msgid = article->msgid;
85     HASHDATUM data;
86 
87     if (msgid) {
88 	fix_msgid(msgid);
89 	data = hashfetch(msgid_hash, msgid, strlen(msgid));
90 	if (data.dat_len) {
91 	    safefree0(data.dat_ptr);
92 	    article->autofl = data.dat_len & (AUTO_SELS | AUTO_KILLS);
93 	    if ((data.dat_len & KF_AGE_MASK) == 0)
94 		article->autofl |= AUTO_OLD;
95 	    else
96 		kf_changethd_cnt++;
97 	    data.dat_len = 0;
98 	}
99 	if ((fake_ap = (ARTICLE*)data.dat_ptr) == NULL) {
100 	    data.dat_ptr = (char*)article;
101 	    hashstorelast(data);
102 	    fake_had_subj = NULL;
103 	    return TRUE;
104 	}
105 	if (fake_ap == article) {
106 	    fake_had_subj = NULL;
107 	    return TRUE;
108 	}
109 
110 	/* Whenever we replace a fake art with a real one, it's a lot of work
111 	** cleaning up the references.  Fortunately, this is not often. */
112 	if (fake_ap && (fake_ap->flags & AF_TMPMEM)) {
113 	    article->parent = fake_ap->parent;
114 	    article->child1 = fake_ap->child1;
115 	    article->sibling = fake_ap->sibling;
116 	    fake_had_subj = fake_ap->subj;
117 	    if (fake_ap->autofl) {
118 		article->autofl |= fake_ap->autofl;
119 		kf_state |= kfs_thread_change_set;
120 	    }
121 	    if (curr_artp == fake_ap) {
122 		curr_artp = article;
123 		curr_art = article_num(article);
124 	    }
125 	    if (recent_artp == fake_ap) {
126 		recent_artp = article;
127 		recent_art = article_num(article);
128 	    }
129 	    if ((ap = article->parent) != NULL) {
130 		if (ap->child1 == fake_ap)
131 		    ap->child1 = article;
132 		else {
133 		    ap = ap->child1;
134 		    /* This sibling-search code is duplicated below */
135 		    while (ap->sibling) {
136 			if (ap->sibling == fake_ap) {
137 			    ap->sibling = article;
138 			    break;
139 			}
140 			ap = ap->sibling;
141 		    }
142 		    /* End of slibling-search code */
143 		}
144 	    } else if (fake_had_subj) {
145 		register SUBJECT* sp = fake_had_subj;
146 		if ((ap = sp->thread) == fake_ap) {
147 		    do {
148 			sp->thread = article;
149 			sp = sp->thread_link;
150 		    } while (sp != fake_had_subj);
151 		} else {
152 		    /* This sibling-search code is duplicated above */
153 		    while (ap->sibling) {
154 			if (ap->sibling == fake_ap) {
155 			    ap->sibling = article;
156 			    break;
157 			}
158 			ap = ap->sibling;
159 		    }
160 		    /* End of slibling-search code */
161 		}
162 	    }
163 	    for (ap = article->child1; ap; ap = ap->sibling)
164 		ap->parent = article;
165 	    clear_article(fake_ap);
166 	    free((char*)fake_ap);
167 	    data.dat_ptr = (char*)article;
168 	    hashstorelast(data);
169 	    return TRUE;
170 	}
171     }
172     /* Forget about the duplicate message-id or bogus article. */
173     uncache_article(article,TRUE);
174     return FALSE;
175 }
176 
177 /* Take a message-id and see if we already know about it.  If so, return
178 ** the article, otherwise create a fake one.
179 */
180 ARTICLE*
get_article(msgid)181 get_article(msgid)
182 char* msgid;
183 {
184     register ARTICLE* article;
185     HASHDATUM data;
186 
187     fix_msgid(msgid);
188 
189     data = hashfetch(msgid_hash, msgid, strlen(msgid));
190     if (data.dat_len) {
191 	article = allocate_article(0);
192 	article->autofl = data.dat_len & (AUTO_SELS | AUTO_KILLS);
193 	if ((data.dat_len & KF_AGE_MASK) == 0)
194 	    article->autofl |= AUTO_OLD;
195 	else
196 	    kf_changethd_cnt++;
197 	article->msgid = data.dat_ptr;
198 	data.dat_ptr = (char*)article;
199 	data.dat_len = 0;
200 	hashstorelast(data);
201     }
202     else if (!(article = (ARTICLE*)data.dat_ptr)) {
203 	article = allocate_article(0);
204 	data.dat_ptr = (char*)article;
205 	article->msgid = savestr(msgid);
206 	hashstorelast(data);
207     }
208     return article;
209 }
210 
211 /* Take all the data we've accumulated about the article and shove it into
212 ** the article tree at the best place we can deduce.
213 */
214 void
thread_article(article,references)215 thread_article(article,references)
216 ARTICLE* article;
217 char* references;
218 {
219     register ARTICLE* ap;
220     register ARTICLE* prev;
221     register char* cp;
222     register char* end;
223     int chain_autofl = (article->autofl
224 	| (article->subj->articles? article->subj->articles->autofl : 0));
225     int thread_autofl, subj_autofl = 0;
226     int rethreading = article->flags & AF_THREADED;
227 
228     /* We're definitely not a fake anymore */
229     article->flags = (article->flags & ~AF_FAKE) | AF_THREADED;
230 
231     /* If the article was already part of an existing thread, unlink it
232     ** to try to put it in the best possible spot.
233     */
234     if (fake_had_subj) {
235 	ARTICLE* stopper;
236 	if (fake_had_subj->thread != article->subj->thread)
237 	    merge_threads(fake_had_subj, article->subj);
238 	/* Check for a real or shared-fake parent */
239 	ap = article->parent;
240 	while (ap && (ap->flags & AF_FAKE) && !ap->child1->sibling) {
241 	    prev = ap;
242 	    ap = ap->parent;
243 	}
244 	stopper = ap;
245 	unlink_child(article);
246 	/* We'll assume that this article has as good or better references
247 	** than the child that faked us initially.  Free the fake reference-
248 	** chain and process our references as usual.
249 	*/
250 	for (ap = article->parent; ap != stopper; ap = prev) {
251 	    unlink_child(ap);
252 	    prev = ap->parent;
253 	    ap->date = 0;
254 	    ap->subj = 0;
255 	    ap->parent = 0;
256 	    /* don't free it until group exit since we probably re-use it */
257 	}
258 	article->parent = NULL;		/* neaten up */
259 	article->sibling = NULL;
260     }
261 
262     /* If we have references, process them from the right end one at a time
263     ** until we either run into somebody, or we run out of references.
264     */
265     if (references && *references) {
266 	prev = article;
267 	ap = NULL;
268 	if ((cp = rindex(references, '<')) == NULL
269 	 || (end = index(cp+1, ' ')) == NULL)
270 	    end = references + strlen(references) - 1;
271 	while (cp) {
272 	    while (end >= cp && end > references
273 	     && (*(unsigned char*)end <= ' ' || *end == ',')) {
274 		end--;
275 	    }
276 	    if (end <= cp)
277 		break;
278 	    end[1] = '\0';
279 	    /* Quit parsing references if this one is garbage. */
280 	    if (!(end = valid_message_id(cp, end)))
281 		break;
282 	    /* Dump all domains that end in '.', such as "..." & "1@DEL." */
283 	    if (end[-1] == '.')
284 		break;
285 	    ap = get_article(cp);
286 	    *cp = '\0';
287 	    chain_autofl |= ap->autofl;
288 	    if (ap->subj == article->subj)
289 		subj_autofl |= ap->autofl;
290 
291 	    /* Check for duplicates on the reference line.  Brand-new data has
292 	    ** no date.  Data we just allocated earlier on this line has a
293 	    ** date but no subj.  Special-case the article itself, since it
294 	    ** does have a subj.
295 	    */
296 	    if ((ap->date && !ap->subj) || ap == article) {
297 		if ((ap = prev) == article)
298 		    ap = NULL;
299 		goto next;
300 	    }
301 
302 	    /* When we're doing late processing of In-Reply-To: lines, we may
303 	    ** have to move an article from an old position.
304 	    */
305 	    if (rethreading && prev->subj)
306 		unlink_child(prev);
307 	    prev->parent = ap;
308 	    link_child(prev);
309 	    if (ap->subj)
310 		break;
311 
312 	    ap->date = article->date;
313 	    prev = ap;
314 	  next:
315 	    if (cp > references)
316 	        end = cp-1;
317 	    else
318 	        end = cp;
319 	    cp = rindex(references, '<');
320 	}
321 	if (!ap)
322 	    goto no_references;
323 
324 	/* Check if we ran into anybody that was already linked.  If so, we
325 	** just use their thread.
326 	*/
327 	if (ap->subj) {
328 	    /* See if this article spans the gap between what we thought
329 	    ** were two different threads.
330 	    */
331 	    if (article->subj->thread != ap->subj->thread)
332 		merge_threads(ap->subj, article->subj);
333 	} else {
334 	    /* We didn't find anybody we knew, so either create a new thread
335 	    ** or use the article's thread if it was previously faked.
336 	    */
337 	    ap->subj = article->subj;
338 	    link_child(ap);
339 	}
340 	/* Set the subj of faked articles we created as references. */
341 	for (ap = article->parent; ap && !ap->subj; ap = ap->parent)
342 	    ap->subj = article->subj;
343 
344 	/* Make sure we didn't circularly link to a child article(!), by
345 	** ensuring that we run off the top before we run into ourself.
346 	*/
347 	while (ap && ap->parent != article)
348 	    ap = ap->parent;
349 	if (ap) {
350 	    /* Ugh.  Someone's tweaked reference line with an incorrect
351 	    ** article-order arrived first, and one of our children is
352 	    ** really one of our ancestors. Cut off the bogus child branch
353 	    ** right where we are and link it to the thread.
354 	    */
355 	    unlink_child(ap);
356 	    ap->parent = NULL;
357 	    link_child(ap);
358 	}
359     } else {
360       no_references:
361 	/* The article has no references.  Either turn it into a new thread
362 	** or re-attach the fleshed-out article to its old thread.  Don't
363 	** touch it at all unless this is the first attempt at threading it.
364 	*/
365 	if (!rethreading)
366 	    link_child(article);
367     }
368     if (!(article->flags & AF_CACHED))
369 	cache_article(article);
370     thread_autofl = chain_autofl;
371     if (sel_mode == SM_THREAD) {
372 	SUBJECT* sp = article->subj->thread_link;
373 	while (sp != article->subj) {
374 	    if (sp->articles)
375 		thread_autofl |= sp->articles->autofl;
376 	    sp = sp->thread_link;
377 	}
378     }
379     subj_autofl |= article->subj->articles->autofl;
380 
381     perform_auto_flags(article, thread_autofl, subj_autofl, chain_autofl);
382 }
383 
384 void
rover_thread(article,s)385 rover_thread(article, s)
386 ARTICLE* article;
387 char* s;
388 {
389     ARTICLE* prev = article;
390     char* end;
391     char ch;
392 
393     for (;;) {
394 	while (*++s == ' ') ;
395 	if (isdigit(*s)) {
396 	    article = article_ptr(atol(s));
397 	    prev->parent = article;
398 	    link_child(prev);
399 	    break;
400 	}
401 	end = index(s, '>');
402 	if (!end)
403 	    return;				/* Impossible! */
404 	ch = end[1];
405 	end[1] = '\0';
406 	article = get_article(s);
407 	prev->parent = article;
408 	link_child(prev);
409 	if (!ch)
410 	    break;
411 	end[1] = ch;
412 	s = end;
413     }
414 }
415 
416 /* Check if the string we've found looks like a valid message-id reference.
417 */
418 static char*
valid_message_id(start,end)419 valid_message_id(start, end)
420 register char* start;
421 register char* end;
422 {
423     char* mid;
424 
425     if (start == end)
426 	return 0;
427 
428     if (*end != '>') {
429 	/* Compensate for space cadets who include the header in their
430 	** subsitution of all '>'s into another citation character.
431 	*/
432 	if (*end == '<' || *end == '-' || *end == '!' || *end == '%'
433 	 || *end == ')' || *end == '|' || *end == ':' || *end == '}'
434 	 || *end == '*' || *end == '+' || *end == '#' || *end == ']'
435 	 || *end == '@' || *end == '$') {
436 	    *end = '>';
437 	}
438     } else if (end[-1] == '>') {
439 	*(end--) = '\0';
440     }
441     /* Id must be "<...@...>" */
442     if (*start != '<' || *end != '>' || (mid = index(start, '@')) == NULL
443      || mid == start+1 || mid+1 == end) {
444 	return 0;
445     }
446     return end;
447 }
448 
449 /* Remove an article from its parent/siblings.  Leave parent pointer intact.
450 */
451 static void
unlink_child(child)452 unlink_child(child)
453 register ARTICLE* child;
454 {
455     register ARTICLE* last;
456 
457     if (!(last = child->parent)) {
458 	register SUBJECT* sp = child->subj;
459 	if ((last = sp->thread) == child) {
460 	    do {
461 		sp->thread = child->sibling;
462 		sp = sp->thread_link;
463 	    } while (sp != child->subj);
464 	} else
465 	    goto sibling_search;
466     } else {
467 	if (last->child1 == child)
468 	    last->child1 = child->sibling;
469 	else {
470 	    last = last->child1;
471 	  sibling_search:
472 	    while (last && last->sibling != child)
473 		last = last->sibling;
474 	    if (last)
475 		last->sibling = child->sibling;
476 	}
477     }
478 }
479 
480 /* Link an article to its parent article.  If its parent pointer is zero,
481 ** link it to its thread.  Sorts siblings by date.
482 */
483 void
link_child(child)484 link_child(child)
485 register ARTICLE* child;
486 {
487     register ARTICLE* ap;
488 
489     if (!(ap = child->parent)) {
490 	register SUBJECT* sp = child->subj;
491 	ap = sp->thread;
492 	if (!ap || child->date < ap->date) {
493 	    do {
494 		sp->thread = child;
495 		sp = sp->thread_link;
496 	    } while (sp != child->subj);
497 	    child->sibling = ap;
498 	} else
499 	    goto sibling_search;
500     } else {
501 	ap = ap->child1;
502 	if (!ap || child->date < ap->date) {
503 	    child->sibling = ap;
504 	    child->parent->child1 = child;
505 	} else {
506 	  sibling_search:
507 	    while (ap->sibling && ap->sibling->date <= child->date)
508 		ap = ap->sibling;
509 	    child->sibling = ap->sibling;
510 	    ap->sibling = child;
511 	}
512     }
513 }
514 
515 /* Merge all of s2's thread into s1's thread.
516 */
517 void
merge_threads(s1,s2)518 merge_threads(s1, s2)
519 SUBJECT* s1;
520 SUBJECT* s2;
521 {
522     register SUBJECT* sp;
523     register ARTICLE* t1;
524     register ARTICLE* t2;
525 
526     t1 = s1->thread;
527     t2 = s2->thread;
528     /* Change all of t2's thread pointers to a common lead article */
529     sp = s2;
530     do {
531 	sp->thread = t1;
532 	sp = sp->thread_link;
533     } while (sp != s2);
534 
535     /* Join the two circular lists together */
536     sp = s2->thread_link;
537     s2->thread_link = s1->thread_link;
538     s1->thread_link = sp;
539 
540     /* If thread mode is set, ensure the subjects are adjacent in the list. */
541     /* Don't do this if the selector is active, because it gets messed up. */
542     if (sel_mode == SM_THREAD && gmode != 's') {
543 	for (sp = s2; sp->prev && sp->prev->thread == t1; ) {
544 	    sp = sp->prev;
545 	    if (sp == s1)
546 		goto artlink;
547 	}
548 	while (s2->next && s2->next->thread == t1) {
549 	    s2 = s2->next;
550 	    if (s2 == s1)
551 		goto artlink;
552 	}
553 	/* Unlink the s2 chunk of subjects from the list */
554 	if (!sp->prev)
555 	    first_subject = s2->next;
556 	else
557 	    sp->prev->next = s2->next;
558 	if (!s2->next)
559 	    last_subject = sp->prev;
560 	else
561 	    s2->next->prev = sp->prev;
562 	/* Link the s2 chunk after s1 */
563 	sp->prev = s1;
564 	s2->next = s1->next;
565 	if (!s1->next)
566 	    last_subject = s2;
567 	else
568 	    s1->next->prev = s2;
569 	s1->next = sp;
570     }
571 
572   artlink:
573     /* Link each article that was attached to t2 to t1. */
574     for (t1 = t2; t1; t1 = t2) {
575 	t2 = t2->sibling;
576 	link_child(t1);      /* parent is null, thread is newly set */
577     }
578 }
579