1 /*
2 * (c) Copyright 1990, Kim Fabricius Storm. All rights reserved.
3 * Copyright (c) 1996-2005 Michael T Pins. All rights reserved.
4 *
5 * Article sorting.
6 */
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include "config.h"
12 #include "global.h"
13 #include "articles.h"
14
15 /* sort.c */
16
17 static int order_subj_date(article_header ** ah1, article_header ** ah2);
18 static int order_date_subj_date(article_header ** ah1, article_header ** ah2);
19 static int order_arrival(article_header ** a, article_header ** b);
20 static int order_date(register article_header ** ah1, register article_header ** ah2);
21 static int order_from_date(register article_header ** ah1, register article_header ** ah2);
22 static flag_type article_equal(article_header ** ah1, article_header ** ah2);
23
24 static int order_arrival();
25
26 #ifdef BAD_ORDER_SUBJ_DATE
27 /* If one article's subject is identical to the first part of another
28 article, the two subjects will still be considered identical if the
29 length of the shorter subject is at least the limit set by this variable. */
30 int subject_match_limit = 20; /* "strncmp" limit for
31 * subjects */
32 #else
33 /* Subjects are considered identical if their first s-m-l characters match */
34 int subject_match_limit = 256; /* "strncmp" limit for
35 * subjects */
36 #endif
37
38 int match_skip_prefix = 0; /* skip first N bytes in matches */
39 int match_parts_equal = 0; /* match digits as equal */
40 int match_parts_begin = 4; /* require N characters for part
41 * match */
42
43 int sort_mode = 1; /* default sort mode */
44
45 extern int bypass_consolidation;
46
47 /*
48 * String matching macroes.
49 *
50 * MATCH_DROP(t, a) and MATCH_DROP(t, b) must both be proven false
51 * before MATCH_?? (t, a, b) is used.
52 */
53
54 #ifdef HAVE_WORKING_COLLATE
55
56 #ifdef HAVE_8BIT_CTYPE
57 #define MATCH_DROP(table, c) !isprint(c)
58 #else
59 #define MATCH_DROP(table, c) ( c & 0200 || !isprint(c) )
60 #endif
61 #define MATCH_EQ(table, a, b) ( a == b || table(a, b) == 0 )
62 #define MATCH_LS_EQ(table, a, b) ( a == b || table(a, b) <= 0 )
63 #define MATCH_LS(table, a, b) ( table(a, b) < 0 )
64 #define MATCH_CMP(table, a, b) table(a, b)
65
match_subject(a,b)66 static int match_subject(a, b)
67 char a, b;
68 {
69 static char aa[2], bb[2];
70
71 aa[0] = a; bb[0] = b;
72 return strcoll(aa, bb);
73 }
74
75 #else
76
77 #define MATCH_DROP(table, c) ( c & 0200 || table[c] == 0 )
78 #define MATCH_EQ(table, a, b) ( a == b || table[a] == table[b] )
79 #define MATCH_LS_EQ(table, a, b) ( a <= b || table[a] <= table[b] )
80 #define MATCH_LS(table, a, b) ( a < b || table[a] < table[b] )
81 #define MATCH_CMP(table, a, b) (table[a] - table[b])
82
83 static char match_subject[128] = {
84
85 /* NUL SOH STX ETX EOT ENQ ACK BEL BS TAB NL VT FF CR SO SI */
86 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00,
87
88 /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */
89 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00,
90
91 /* SP ! " # $ % & ' ( ) * + , - . / */
92 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 99, 00, 00, 00, 00,
93 /* ^^ */
94
95 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
96 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 00, 00, 00, 00, 00, 00,
97
98 /* @ A B C D E F G H I J K L M N O */
99 00, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
100
101 /* P Q R S T U V W X Y Z [ \ ] ^ _ */
102 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 00, 00,
103
104 /* ` a b c d e f g h i j k l m n o */
105 00, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
106
107 /* p q r s t u v w x y z { | } ~ DEL */
108 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 00, 00
109 };
110
111 #endif /* HAVE_WORKING_COLLATE */
112
113 static int
order_subj_date(article_header ** ah1,article_header ** ah2)114 order_subj_date(article_header ** ah1, article_header ** ah2)
115 {
116 register char *a = (**ah1).subject, *b = (**ah2).subject;
117 register char ca, cb;
118 register int p, len;
119
120 if (match_skip_prefix) {
121 if ((int) (**ah1).subj_length > match_skip_prefix) {
122 if ((int) (**ah2).subj_length > match_skip_prefix) {
123 a += match_skip_prefix;
124 b += match_skip_prefix;
125 } else
126 return 1;
127 } else {
128 if ((int) (**ah2).subj_length > match_skip_prefix) {
129 return -1;
130 }
131 }
132 }
133
134 #ifdef BAD_ORDER_SUBJ_DATE
135 for (len = 0;; len++, a++, b++) {
136 #else
137 for (len = subject_match_limit; --len >= 0; a++, b++) {
138 #endif
139
140 while ((ca = *a) && MATCH_DROP(match_subject, ((int8) ca)))
141 a++;
142 while ((cb = *b) && MATCH_DROP(match_subject, ((int8) cb)))
143 b++;
144
145 if (ca == NUL) {
146 if (cb == NUL)
147 break;
148
149 #ifdef BAD_ORDER_SUBJ_DATE
150 if (len >= subject_match_limit)
151 break;
152 #endif
153
154 return -1;
155 }
156 if (cb == NUL) {
157
158 #ifdef BAD_ORDER_SUBJ_DATE
159 if (len >= subject_match_limit)
160 break;
161 #endif
162
163 return 1;
164 }
165 if ((p = MATCH_CMP(match_subject, (int8) ca, (int8) cb)))
166 return p;
167 }
168
169 if ((**ah1).t_stamp > (**ah2).t_stamp)
170 return 1;
171 if ((**ah1).t_stamp < (**ah2).t_stamp)
172 return -1;
173 return order_arrival(ah1, ah2);
174 }
175
176 /* data_subj_date can only be used after root_t_stamp is set */
177
178 static int
179 order_date_subj_date(article_header ** ah1, article_header ** ah2)
180 {
181 register time_stamp t1 = (**ah1).root_t_stamp;
182 register time_stamp t2 = (**ah2).root_t_stamp;
183
184 if (t1 > t2)
185 return 1;
186 if (t1 < t2)
187 return -1;
188 return order_subj_date(ah1, ah2); /* get original order */
189 }
190
191
192 static int
193 order_arrival(article_header ** a, article_header ** b)
194 {
195 register long i;
196
197 if ((i = ((*a)->a_number - (*b)->a_number)) == 0)
198 i = (*a)->fpos - (*b)->fpos;
199
200 return (i > 0) ? 1 : (i < 0) ? -1 : 0;
201 }
202
203 static int
204 order_date(article_header ** ah1, article_header ** ah2)
205 {
206 if ((**ah1).t_stamp > (**ah2).t_stamp)
207 return 1;
208 if ((**ah1).t_stamp < (**ah2).t_stamp)
209 return -1;
210 return order_arrival(ah1, ah2);
211 }
212
213 static int
214 order_from_date(register article_header ** ah1, register article_header ** ah2)
215 {
216 register int i;
217
218 if ((i = strcmp((**ah1).sender, (**ah2).sender)))
219 return i;
220 return order_date(ah1, ah2);
221 }
222
223 static flag_type
224 article_equal(article_header ** ah1, article_header ** ah2)
225 /* ah1.hdr == ah2.hdr */
226 {
227 register char *a = (**ah1).subject, *b = (**ah2).subject;
228 register int len;
229
230 if (match_skip_prefix) {
231 if ((int) (**ah1).subj_length > match_skip_prefix) {
232 if ((int) (**ah2).subj_length > match_skip_prefix) {
233 a += match_skip_prefix;
234 b += match_skip_prefix;
235 }
236 }
237 }
238 for (len = 0;; len++, a++, b++) {
239 while (*a && MATCH_DROP(match_subject, (int8) * a))
240 a++;
241 while (*b && MATCH_DROP(match_subject, (int8) * b))
242 b++;
243
244 if (*a == NUL) {
245 if (*b == NUL)
246 break;
247 if (len >= subject_match_limit)
248 return A_ALMOST_SAME;
249 return 0;
250 }
251 if (*b == NUL) {
252 if (len >= subject_match_limit)
253 return A_ALMOST_SAME;
254 return 0;
255 }
256 if (!MATCH_EQ(match_subject, (int8) * a, (int8) * b)) {
257 if (len >= subject_match_limit)
258 return A_ALMOST_SAME;
259 if (len >= match_parts_begin && match_parts_equal &&
260 isdigit(*a) && isdigit(*b))
261 return A_ALMOST_SAME;
262 return 0;
263 }
264 }
265
266 return A_SAME;
267 }
268
269 void
270 sort_articles(int mode)
271 {
272 register article_header **app;
273 register long n;
274 register flag_type same;
275 fct_type cmp;
276
277 if (n_articles <= 0)
278 return;
279
280 for (n = n_articles; --n >= 0;)
281 articles[n]->flag &= ~(A_SAME | A_ALMOST_SAME | A_NEXT_SAME | A_ROOT_ART);
282
283 if (n_articles == 1) {
284 articles[0]->flag |= A_ROOT_ART;
285 return;
286 }
287 if (mode == -1)
288 mode = sort_mode;
289
290 switch (mode) {
291 case -2: /* restore original ordering for update */
292 cmp = order_arrival;
293 break;
294 default:
295 case 0: /* arrival (no sort) */
296 cmp = order_arrival;
297 bypass_consolidation = 1;
298 break;
299 case 1: /* date-subject-date */
300 case 2: /* subject-date */
301 cmp = order_subj_date;
302 break;
303 case 3: /* date only */
304 cmp = order_date;
305 bypass_consolidation = 1;
306 break;
307 case 4: /* sender-date */
308 cmp = order_from_date;
309 bypass_consolidation = 1;
310 break;
311 }
312
313 quicksort(articles, n_articles, article_header *, cmp);
314
315 articles[0]->root_t_stamp = articles[0]->t_stamp;
316 articles[0]->flag |= A_ROOT_ART;
317 for (n = n_articles - 1, app = articles + 1; --n >= 0; app++) {
318 if ((same = article_equal(app, app - 1))) {
319 app[0]->root_t_stamp = app[-1]->root_t_stamp;
320 app[0]->flag |= same;
321 app[-1]->flag |= A_NEXT_SAME;
322 } else {
323 app[0]->root_t_stamp = app[0]->t_stamp;
324 app[0]->flag |= A_ROOT_ART;
325 }
326 }
327
328 if (mode == 1)
329 quicksort(articles, n_articles, article_header *, order_date_subj_date);
330 }
331
332 /*
333 * If articles are not sorted via sort_articles, they must still be
334 * marked with proper attributes (e.g. A_ROOT_ART) via no_sort_articles.
335 */
336
337 void
338 no_sort_articles(void)
339 {
340 register article_number n;
341 register article_header *ah;
342
343 for (n = n_articles; --n >= 0;) {
344 ah = articles[n];
345 ah->flag &= ~(A_SAME | A_ALMOST_SAME | A_NEXT_SAME | A_ROOT_ART);
346 ah->flag |= A_ROOT_ART;
347 }
348 bypass_consolidation = 1;
349 }
350
351 /*
352 * Eliminate articles with the A_KILL flag set preserving the present ordering.
353 * This will only release the last entries in the articles array.
354 * Neither strings nor articles headers are released.
355 */
356
357 int
358 elim_articles(register article_number * list, int list_lgt)
359 {
360 register article_header **srca, **desta;
361 register article_number n, count;
362 register flag_type same;
363 int changed, llen;
364
365 count = 0;
366 changed = 0, llen = 0;
367 for (n = 0, srca = desta = articles; n < n_articles; n++, srca++) {
368 if ((*srca)->attr == A_KILL) {
369 if (list_lgt > 0) {
370 if (n < *list) {
371 if (llen)
372 changed = 1;
373 } else if (n == *list) {
374 if (llen) {
375 llen++;
376 list_lgt--;
377 *list++ = -1;
378 } else
379 ++(*list);
380 changed = 1;
381 }
382 }
383 continue;
384 }
385 if (list_lgt > 0 && n == *list) {
386 *list++ = count;
387 list_lgt--;
388 llen++;
389 }
390 count++;
391 *desta++ = *srca;
392 }
393 if (list_lgt > 0) {
394 if (!llen)
395 *list = 0;
396 changed = 1;
397 }
398 n_articles = count;
399
400 if (changed && n_articles > 0) {
401 srca = articles;
402 srca[0]->flag &= ~(A_SAME | A_ALMOST_SAME | A_NEXT_SAME);
403 srca[0]->flag |= A_ROOT_ART;
404 for (n = n_articles - 1, srca++; --n >= 0; srca++) {
405 srca[0]->flag &= ~(A_SAME | A_ALMOST_SAME | A_NEXT_SAME | A_ROOT_ART);
406 if ((same = article_equal(srca, srca - 1))) {
407 srca[0]->flag |= same;
408 srca[-1]->flag |= A_NEXT_SAME;
409 } else
410 srca[0]->flag |= A_ROOT_ART;
411 }
412 }
413 return changed;
414 }
415