1 /*
2  *	(c) Copyright 1990, Kim Fabricius Storm.  All rights reserved.
3  *      Copyright (c) 1996-2005 Michael T Pins.  All rights reserved.
4  *
5  *	Article sorting.
6  */
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include "config.h"
12 #include "global.h"
13 #include "articles.h"
14 
15 /* sort.c */
16 
17 static int      order_subj_date(article_header ** ah1, article_header ** ah2);
18 static int      order_date_subj_date(article_header ** ah1, article_header ** ah2);
19 static int      order_arrival(article_header ** a, article_header ** b);
20 static int      order_date(register article_header ** ah1, register article_header ** ah2);
21 static int      order_from_date(register article_header ** ah1, register article_header ** ah2);
22 static flag_type article_equal(article_header ** ah1, article_header ** ah2);
23 
24 static int      order_arrival();
25 
26 #ifdef BAD_ORDER_SUBJ_DATE
27 /* If one article's subject is identical to the first part of another
28  article, the two subjects will still be considered identical if the
29  length of the shorter subject is at least the limit set by this variable. */
30 int             subject_match_limit = 20;	/* "strncmp" limit for
31 						 * subjects */
32 #else
33 /* Subjects are considered identical if their first s-m-l characters match */
34 int             subject_match_limit = 256;	/* "strncmp" limit for
35 						 * subjects */
36 #endif
37 
38 int             match_skip_prefix = 0;	/* skip first N bytes in matches */
39 int             match_parts_equal = 0;	/* match digits as equal */
40 int             match_parts_begin = 4;	/* require N characters for part
41 					 * match */
42 
43 int             sort_mode = 1;	/* default sort mode */
44 
45 extern int      bypass_consolidation;
46 
47 /*
48  *	String matching macroes.
49  *
50  * 	MATCH_DROP(t, a) and MATCH_DROP(t, b) must both be proven false
51  *	before MATCH_?? (t, a, b) is used.
52  */
53 
54 #ifdef HAVE_WORKING_COLLATE
55 
56 #ifdef HAVE_8BIT_CTYPE
57 #define MATCH_DROP(table, c) !isprint(c)
58 #else
59 #define MATCH_DROP(table, c) ( c & 0200 || !isprint(c) )
60 #endif
61 #define MATCH_EQ(table, a, b) ( a == b || table(a, b) == 0 )
62 #define MATCH_LS_EQ(table, a, b) ( a == b || table(a, b) <= 0 )
63 #define MATCH_LS(table, a, b) ( table(a, b) < 0 )
64 #define MATCH_CMP(table, a, b) table(a, b)
65 
match_subject(a,b)66 static int match_subject(a, b)
67 char a, b;
68 {
69 	static char aa[2], bb[2];
70 
71 	aa[0] = a; bb[0] = b;
72 	return strcoll(aa, bb);
73 }
74 
75 #else
76 
77 #define	MATCH_DROP(table, c) ( c & 0200 || table[c] == 0 )
78 #define MATCH_EQ(table, a, b) ( a == b || table[a] == table[b] )
79 #define MATCH_LS_EQ(table, a, b) ( a <= b || table[a] <= table[b] )
80 #define MATCH_LS(table, a, b) ( a < b || table[a] < table[b] )
81 #define	MATCH_CMP(table, a, b) (table[a] - table[b])
82 
83 static char     match_subject[128] = {
84 
85 /*  NUL SOH STX ETX EOT ENQ ACK BEL BS  TAB NL  VT  FF  CR  SO  SI  */
86     00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00,
87 
88 /*  DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM  SUB ESC FS  GS  RS  US  */
89     00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00,
90 
91 /*  SP  !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /   */
92     00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 99, 00, 00, 00, 00,
93 /*                                              ^^                  */
94 
95 /*  0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?   */
96     1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 00, 00, 00, 00, 00, 00,
97 
98 /*  @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O   */
99     00, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
100 
101 /*  P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _   */
102     26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 00, 00,
103 
104 /*  `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o   */
105     00, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
106 
107 /*  p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~   DEL */
108     26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 00, 00
109 };
110 
111 #endif /* HAVE_WORKING_COLLATE */
112 
113 static int
order_subj_date(article_header ** ah1,article_header ** ah2)114 order_subj_date(article_header ** ah1, article_header ** ah2)
115 {
116     register char  *a = (**ah1).subject, *b = (**ah2).subject;
117     register char   ca, cb;
118     register int    p, len;
119 
120     if (match_skip_prefix) {
121 	if ((int) (**ah1).subj_length > match_skip_prefix) {
122 	    if ((int) (**ah2).subj_length > match_skip_prefix) {
123 		a += match_skip_prefix;
124 		b += match_skip_prefix;
125 	    } else
126 		return 1;
127 	} else {
128 	    if ((int) (**ah2).subj_length > match_skip_prefix) {
129 		return -1;
130 	    }
131 	}
132     }
133 
134 #ifdef BAD_ORDER_SUBJ_DATE
135     for (len = 0;; len++, a++, b++) {
136 #else
137     for (len = subject_match_limit; --len >= 0; a++, b++) {
138 #endif
139 
140 	while ((ca = *a) && MATCH_DROP(match_subject, ((int8) ca)))
141 	    a++;
142 	while ((cb = *b) && MATCH_DROP(match_subject, ((int8) cb)))
143 	    b++;
144 
145 	if (ca == NUL) {
146 	    if (cb == NUL)
147 		break;
148 
149 #ifdef BAD_ORDER_SUBJ_DATE
150 	    if (len >= subject_match_limit)
151 		break;
152 #endif
153 
154 	    return -1;
155 	}
156 	if (cb == NUL) {
157 
158 #ifdef BAD_ORDER_SUBJ_DATE
159 	    if (len >= subject_match_limit)
160 		break;
161 #endif
162 
163 	    return 1;
164 	}
165 	if ((p = MATCH_CMP(match_subject, (int8) ca, (int8) cb)))
166 	    return p;
167     }
168 
169     if ((**ah1).t_stamp > (**ah2).t_stamp)
170 	return 1;
171     if ((**ah1).t_stamp < (**ah2).t_stamp)
172 	return -1;
173     return order_arrival(ah1, ah2);
174 }
175 
176 /* data_subj_date can only be used after root_t_stamp is set */
177 
178 static int
179 order_date_subj_date(article_header ** ah1, article_header ** ah2)
180 {
181     register time_stamp t1 = (**ah1).root_t_stamp;
182     register time_stamp t2 = (**ah2).root_t_stamp;
183 
184     if (t1 > t2)
185 	return 1;
186     if (t1 < t2)
187 	return -1;
188     return order_subj_date(ah1, ah2);	/* get original order */
189 }
190 
191 
192 static int
193 order_arrival(article_header ** a, article_header ** b)
194 {
195     register long   i;
196 
197     if ((i = ((*a)->a_number - (*b)->a_number)) == 0)
198 	i = (*a)->fpos - (*b)->fpos;
199 
200     return (i > 0) ? 1 : (i < 0) ? -1 : 0;
201 }
202 
203 static int
204 order_date(article_header ** ah1, article_header ** ah2)
205 {
206     if ((**ah1).t_stamp > (**ah2).t_stamp)
207 	return 1;
208     if ((**ah1).t_stamp < (**ah2).t_stamp)
209 	return -1;
210     return order_arrival(ah1, ah2);
211 }
212 
213 static int
214 order_from_date(register article_header ** ah1, register article_header ** ah2)
215 {
216     register int    i;
217 
218     if ((i = strcmp((**ah1).sender, (**ah2).sender)))
219 	return i;
220     return order_date(ah1, ah2);
221 }
222 
223 static          flag_type
224 article_equal(article_header ** ah1, article_header ** ah2)
225  /* ah1.hdr == ah2.hdr */
226 {
227     register char  *a = (**ah1).subject, *b = (**ah2).subject;
228     register int    len;
229 
230     if (match_skip_prefix) {
231 	if ((int) (**ah1).subj_length > match_skip_prefix) {
232 	    if ((int) (**ah2).subj_length > match_skip_prefix) {
233 		a += match_skip_prefix;
234 		b += match_skip_prefix;
235 	    }
236 	}
237     }
238     for (len = 0;; len++, a++, b++) {
239 	while (*a && MATCH_DROP(match_subject, (int8) * a))
240 	    a++;
241 	while (*b && MATCH_DROP(match_subject, (int8) * b))
242 	    b++;
243 
244 	if (*a == NUL) {
245 	    if (*b == NUL)
246 		break;
247 	    if (len >= subject_match_limit)
248 		return A_ALMOST_SAME;
249 	    return 0;
250 	}
251 	if (*b == NUL) {
252 	    if (len >= subject_match_limit)
253 		return A_ALMOST_SAME;
254 	    return 0;
255 	}
256 	if (!MATCH_EQ(match_subject, (int8) * a, (int8) * b)) {
257 	    if (len >= subject_match_limit)
258 		return A_ALMOST_SAME;
259 	    if (len >= match_parts_begin && match_parts_equal &&
260 		isdigit(*a) && isdigit(*b))
261 		return A_ALMOST_SAME;
262 	    return 0;
263 	}
264     }
265 
266     return A_SAME;
267 }
268 
269 void
270 sort_articles(int mode)
271 {
272     register article_header **app;
273     register long   n;
274     register flag_type same;
275     fct_type        cmp;
276 
277     if (n_articles <= 0)
278 	return;
279 
280     for (n = n_articles; --n >= 0;)
281 	articles[n]->flag &= ~(A_SAME | A_ALMOST_SAME | A_NEXT_SAME | A_ROOT_ART);
282 
283     if (n_articles == 1) {
284 	articles[0]->flag |= A_ROOT_ART;
285 	return;
286     }
287     if (mode == -1)
288 	mode = sort_mode;
289 
290     switch (mode) {
291 	case -2:		/* restore original ordering for update */
292 	    cmp = order_arrival;
293 	    break;
294 	default:
295 	case 0:		/* arrival (no sort) */
296 	    cmp = order_arrival;
297 	    bypass_consolidation = 1;
298 	    break;
299 	case 1:		/* date-subject-date */
300 	case 2:		/* subject-date */
301 	    cmp = order_subj_date;
302 	    break;
303 	case 3:		/* date only */
304 	    cmp = order_date;
305 	    bypass_consolidation = 1;
306 	    break;
307 	case 4:		/* sender-date */
308 	    cmp = order_from_date;
309 	    bypass_consolidation = 1;
310 	    break;
311     }
312 
313     quicksort(articles, n_articles, article_header *, cmp);
314 
315     articles[0]->root_t_stamp = articles[0]->t_stamp;
316     articles[0]->flag |= A_ROOT_ART;
317     for (n = n_articles - 1, app = articles + 1; --n >= 0; app++) {
318 	if ((same = article_equal(app, app - 1))) {
319 	    app[0]->root_t_stamp = app[-1]->root_t_stamp;
320 	    app[0]->flag |= same;
321 	    app[-1]->flag |= A_NEXT_SAME;
322 	} else {
323 	    app[0]->root_t_stamp = app[0]->t_stamp;
324 	    app[0]->flag |= A_ROOT_ART;
325 	}
326     }
327 
328     if (mode == 1)
329 	quicksort(articles, n_articles, article_header *, order_date_subj_date);
330 }
331 
332 /*
333  *	If articles are not sorted via sort_articles, they must still be
334  *	marked with proper attributes (e.g. A_ROOT_ART) via no_sort_articles.
335  */
336 
337 void
338 no_sort_articles(void)
339 {
340     register article_number n;
341     register article_header *ah;
342 
343     for (n = n_articles; --n >= 0;) {
344 	ah = articles[n];
345 	ah->flag &= ~(A_SAME | A_ALMOST_SAME | A_NEXT_SAME | A_ROOT_ART);
346 	ah->flag |= A_ROOT_ART;
347     }
348     bypass_consolidation = 1;
349 }
350 
351 /*
352  * Eliminate articles with the A_KILL flag set preserving the present ordering.
353  * This will only release the last entries in the articles array.
354  * Neither strings nor articles headers are released.
355  */
356 
357 int
358 elim_articles(register article_number * list, int list_lgt)
359 {
360     register article_header **srca, **desta;
361     register article_number n, count;
362     register flag_type same;
363     int             changed, llen;
364 
365     count = 0;
366     changed = 0, llen = 0;
367     for (n = 0, srca = desta = articles; n < n_articles; n++, srca++) {
368 	if ((*srca)->attr == A_KILL) {
369 	    if (list_lgt > 0) {
370 		if (n < *list) {
371 		    if (llen)
372 			changed = 1;
373 		} else if (n == *list) {
374 		    if (llen) {
375 			llen++;
376 			list_lgt--;
377 			*list++ = -1;
378 		    } else
379 			++(*list);
380 		    changed = 1;
381 		}
382 	    }
383 	    continue;
384 	}
385 	if (list_lgt > 0 && n == *list) {
386 	    *list++ = count;
387 	    list_lgt--;
388 	    llen++;
389 	}
390 	count++;
391 	*desta++ = *srca;
392     }
393     if (list_lgt > 0) {
394 	if (!llen)
395 	    *list = 0;
396 	changed = 1;
397     }
398     n_articles = count;
399 
400     if (changed && n_articles > 0) {
401 	srca = articles;
402 	srca[0]->flag &= ~(A_SAME | A_ALMOST_SAME | A_NEXT_SAME);
403 	srca[0]->flag |= A_ROOT_ART;
404 	for (n = n_articles - 1, srca++; --n >= 0; srca++) {
405 	    srca[0]->flag &= ~(A_SAME | A_ALMOST_SAME | A_NEXT_SAME | A_ROOT_ART);
406 	    if ((same = article_equal(srca, srca - 1))) {
407 		srca[0]->flag |= same;
408 		srca[-1]->flag |= A_NEXT_SAME;
409 	    } else
410 		srca[0]->flag |= A_ROOT_ART;
411 	}
412     }
413     return changed;
414 }
415