1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #include "msgCore.h"  // precompiled header...
7 #include "prlog.h"
8 
9 #include "MailNewsTypes.h"
10 #include "nsMsgKeySet.h"
11 #include "prprf.h"
12 #include "prmem.h"
13 #include "nsTArray.h"
14 #include "nsMemory.h"
15 #include <ctype.h>
16 
17 #if defined(DEBUG_seth_) || defined(DEBUG_sspitzer_)
18 #  define DEBUG_MSGKEYSET 1
19 #endif
20 
21 /* A compressed encoding for sets of article.  This is usually for lines from
22    the newsrc, which have article lists like
23 
24    1-29627,29635,29658,32861-32863
25 
26    so the data has these properties:
27 
28    - strictly increasing
29    - large subsequences of monotonically increasing ranges
30    - gaps in the set are usually small, but not always
31    - consecutive ranges tend to be large
32 
33    The biggest win is to run-length encode the data, storing ranges as two
34    numbers (start+length or start,end). We could also store each number as a
35    delta from the previous number for further compression, but that gets kind
36    of tricky, since there are no guarantees about the sizes of the gaps, and
37    we'd have to store variable-length words.
38 
39    Current data format:
40 
41    DATA := SIZE [ CHUNK ]*
42    CHUNK := [ RANGE | VALUE ]
43    RANGE := -LENGTH START
44    START := VALUE
45    LENGTH := int32_t
46    VALUE := a literal positive integer, for now
47    it could also be an offset from the previous value.
48    LENGTH could also perhaps be a less-than-32-bit quantity,
49    at least most of the time.
50 
51    Lengths of CHUNKs are stored negative to distinguish the beginning of
52    a chunk from a literal: negative means two-word sequence, positive
53    means one-word sequence.
54 
55    0 represents a literal 0, but should not occur, and should never occur
56    except in the first position.
57 
58    A length of -1 won't occur either, except temporarily - a sequence of
59    two elements is represented as two literals, since they take up the same
60    space.
61 
62    Another optimization we make is to notice that we typically ask the
63    question ``is N a member of the set'' for increasing values of N. So the
64    set holds a cache of the last value asked for, and can simply resume the
65    search from there.  */
66 
nsMsgKeySet()67 nsMsgKeySet::nsMsgKeySet(/* MSG_NewsHost* host*/) {
68   MOZ_COUNT_CTOR(nsMsgKeySet);
69   m_cached_value = -1;
70   m_cached_value_index = 0;
71   m_length = 0;
72   m_data_size = 10;
73   m_data = (int32_t*)PR_Malloc(sizeof(int32_t) * m_data_size);
74 #ifdef NEWSRC_DOES_HOST_STUFF
75   m_host = host;
76 #endif
77 }
78 
~nsMsgKeySet()79 nsMsgKeySet::~nsMsgKeySet() {
80   MOZ_COUNT_DTOR(nsMsgKeySet);
81   PR_FREEIF(m_data);
82 }
83 
Grow()84 bool nsMsgKeySet::Grow() {
85   int32_t new_size;
86   int32_t* new_data;
87   new_size = m_data_size * 2;
88   new_data = (int32_t*)PR_REALLOC(m_data, sizeof(int32_t) * new_size);
89   if (!new_data) return false;
90   m_data_size = new_size;
91   m_data = new_data;
92   return true;
93 }
94 
nsMsgKeySet(const char * numbers)95 nsMsgKeySet::nsMsgKeySet(const char* numbers /* , MSG_NewsHost* host */) {
96   int32_t *head, *tail, *end;
97   MOZ_COUNT_CTOR(nsMsgKeySet);
98 
99 #ifdef NEWSRC_DOES_HOST_STUFF
100   m_host = host;
101 #endif
102   m_cached_value = -1;
103   m_cached_value_index = 0;
104   m_length = 0;
105   m_data_size = 10;
106   m_data = (int32_t*)PR_Malloc(sizeof(int32_t) * m_data_size);
107   if (!m_data) return;
108 
109   head = m_data;
110   tail = head;
111   end = head + m_data_size;
112 
113   if (!numbers) {
114     return;
115   }
116 
117   while (isspace(*numbers)) numbers++;
118   while (*numbers) {
119     int32_t from = 0;
120     int32_t to;
121 
122     if (tail >= end - 4) {
123       /* out of room! */
124       int32_t tailo = tail - head;
125       if (!Grow()) {
126         PR_FREEIF(m_data);
127         return;
128       }
129       /* data may have been relocated */
130       head = m_data;
131       tail = head + tailo;
132       end = head + m_data_size;
133     }
134 
135     while (isspace(*numbers)) numbers++;
136     if (*numbers && !isdigit(*numbers)) {
137       break; /* illegal character */
138     }
139     while (isdigit(*numbers)) {
140       from = (from * 10) + (*numbers++ - '0');
141     }
142     while (isspace(*numbers)) numbers++;
143     if (*numbers != '-') {
144       to = from;
145     } else {
146       to = 0;
147       numbers++;
148       while (*numbers >= '0' && *numbers <= '9')
149         to = (to * 10) + (*numbers++ - '0');
150       while (isspace(*numbers)) numbers++;
151     }
152 
153     if (to < from) to = from; /* illegal */
154 
155     /* This is a hack - if the newsrc file specifies a range 1-x as
156        being read, we internally pretend that article 0 is read as well.
157        (But if only 2-x are read, then 0 is not read.)  This is needed
158        because some servers think that article 0 is an article (I think)
159        but some news readers (including Netscape 1.1) choke if the .newsrc
160        file has lines beginning with 0...   ### */
161     if (from == 1) from = 0;
162 
163     if (to == from) {
164       /* Write it as a literal */
165       *tail = from;
166       tail++;
167     } else /* Write it as a range. */ {
168       *tail = -(to - from);
169       tail++;
170       *tail = from;
171       tail++;
172     }
173 
174     while (*numbers == ',' || isspace(*numbers)) {
175       numbers++;
176     }
177   }
178 
179   m_length = tail - head; /* size of data */
180 }
181 
Create()182 nsMsgKeySet* nsMsgKeySet::Create(/*MSG_NewsHost* host*/) {
183   nsMsgKeySet* set = new nsMsgKeySet(/* host */);
184   if (set && set->m_data == NULL) {
185     delete set;
186     set = NULL;
187   }
188   return set;
189 }
190 
Create(const char * value)191 nsMsgKeySet* nsMsgKeySet::Create(const char* value /* , MSG_NewsHost* host */) {
192 #ifdef DEBUG_MSGKEYSET
193   printf("create from %s\n", value);
194 #endif
195 
196   nsMsgKeySet* set = new nsMsgKeySet(value /* , host */);
197   if (set && set->m_data == NULL) {
198     delete set;
199     set = NULL;
200   }
201   return set;
202 }
203 
204 /* Returns the lowest non-member of the set greater than 0.
205  */
FirstNonMember()206 int32_t nsMsgKeySet::FirstNonMember() {
207   if (m_length <= 0) {
208     return 1;
209   } else if (m_data[0] < 0 && m_data[1] != 1 && m_data[1] != 0) {
210     /* first range not equal to 0 or 1, always return 1 */
211     return 1;
212   } else if (m_data[0] < 0) {
213     /* it's a range */
214     /* If there is a range [N-M] we can presume that M+1 is not in the
215        set. */
216     return (m_data[1] - m_data[0] + 1);
217   } else {
218     /* it's a literal */
219     if (m_data[0] == 1) {
220       /* handle "1,..." */
221       if (m_length > 1 && m_data[1] == 2) {
222         /* This is "1,2,M-N,..." or "1,2,M,..."  where M >= 4.  Note
223            that M will never be 3, because in that case we would have
224            started with a range: "1-3,..." */
225         return 3;
226       } else {
227         return 2; /* handle "1,M-N,.." or "1,M,..."
228          where M >= 3; */
229       }
230     } else if (m_data[0] == 0) {
231       /* handle "0,..." */
232       if (m_length > 1 && m_data[1] == 1) {
233         /* this is 0,1, (see above) */
234         return 2;
235       } else {
236         return 1;
237       }
238 
239     } else {
240       /* handle "M,..." where M >= 2. */
241       return 1;
242     }
243   }
244 }
245 
Output(char ** outputStr)246 nsresult nsMsgKeySet::Output(char** outputStr) {
247   NS_ENSURE_ARG(outputStr);
248   int32_t size;
249   int32_t* head;
250   int32_t* tail;
251   int32_t* end;
252   int32_t s_size;
253   char* s_head;
254   char *s, *s_end;
255   int32_t last_art = -1;
256 
257   *outputStr = nullptr;
258 
259   size = m_length;
260   head = m_data;
261   tail = head;
262   end = head + size;
263 
264   s_size = (size * 12) +
265            10;  // dmb - try to make this allocation get used at least once.
266   s_head = (char*)moz_xmalloc(s_size);
267   if (!s_head) return NS_ERROR_OUT_OF_MEMORY;
268 
269   s_head[0] = '\0';  // otherwise, s_head will contain garbage.
270   s = s_head;
271   s_end = s + s_size;
272 
273   while (tail < end) {
274     int32_t from;
275     int32_t to;
276 
277     if (s > (s_end - (12 * 2 + 10))) { /* 12 bytes for each number (enough
278                         for "2147483647" aka 2^31-1),
279                         plus 10 bytes of slop. */
280       int32_t so = s - s_head;
281       s_size += 200;
282       char* tmp = (char*)moz_xmalloc(s_size);
283       if (tmp) PL_strcpy(tmp, s_head);
284       free(s_head);
285       s_head = tmp;
286       if (!s_head) return NS_ERROR_OUT_OF_MEMORY;
287       s = s_head + so;
288       s_end = s_head + s_size;
289     }
290 
291     if (*tail < 0) {
292       /* it's a range */
293       from = tail[1];
294       to = from + (-(tail[0]));
295       tail += 2;
296     } else /* it's a literal */
297     {
298       from = *tail;
299       to = from;
300       tail++;
301     }
302     if (from == 0) {
303       from = 1; /* See 'hack' comment above  ### */
304     }
305     if (from <= last_art) from = last_art + 1;
306     if (from <= to) {
307       if (from < to) {
308         PR_snprintf(s, s_end - s, "%lu-%lu,", from, to);
309       } else {
310         PR_snprintf(s, s_end - s, "%lu,", from);
311       }
312       s += PL_strlen(s);
313       last_art = to;
314     }
315   }
316   if (last_art >= 0) {
317     s--; /* Strip off the last ',' */
318   }
319 
320   *s = 0;
321 
322   *outputStr = s_head;
323   return NS_OK;
324 }
325 
GetLastMember()326 int32_t nsMsgKeySet::GetLastMember() {
327   if (m_length > 1) {
328     int32_t nextToLast = m_data[m_length - 2];
329     if (nextToLast < 0)  // is range at end?
330     {
331       int32_t last = m_data[m_length - 1];
332       return (-nextToLast + last - 1);
333     } else  // no, so last number must be last member
334     {
335       return m_data[m_length - 1];
336     }
337   } else if (m_length == 1)
338     return m_data[0];  // must be only 1 read.
339   else
340     return 0;
341 }
342 
SetLastMember(int32_t newHighWaterMark)343 void nsMsgKeySet::SetLastMember(int32_t newHighWaterMark) {
344   if (newHighWaterMark < GetLastMember()) {
345     while (true) {
346       if (m_length > 1) {
347         int32_t nextToLast = m_data[m_length - 2];
348         int32_t curHighWater;
349         if (nextToLast < 0)  // is range at end?
350         {
351           int32_t rangeStart = m_data[m_length - 1];
352           int32_t rangeLength = -nextToLast;
353           curHighWater = (rangeLength + rangeStart - 1);
354           if (curHighWater > newHighWaterMark) {
355             if (rangeStart > newHighWaterMark) {
356               m_length -= 2;  // throw away whole range
357             } else if (rangeStart == newHighWaterMark) {
358               // turn range into single element.
359               m_data[m_length - 2] = newHighWaterMark;
360               m_length--;
361               break;
362             } else  // just shorten range
363             {
364               m_data[m_length - 2] = -(newHighWaterMark - rangeStart);
365               break;
366             }
367           } else {
368             // prevent the infinite loop
369             // see bug #13062
370             break;
371           }
372         } else if (m_data[m_length - 1] >
373                    newHighWaterMark)  // no, so last number must be last member
374         {
375           m_length--;
376         } else
377           break;
378       } else
379         break;
380     }
381     // well, the whole range is probably invalid, because the server probably
382     // re-ordered ids, but what can you do?
383 #ifdef NEWSRC_DOES_HOST_STUFF
384     if (m_host) m_host->MarkDirty();
385 #endif
386   }
387 }
388 
GetFirstMember()389 int32_t nsMsgKeySet::GetFirstMember() {
390   if (m_length > 1) {
391     int32_t first = m_data[0];
392     if (first < 0)  // is range at start?
393     {
394       int32_t second = m_data[1];
395       return (second);
396     } else  // no, so first number must be first member
397     {
398       return m_data[0];
399     }
400   } else if (m_length == 1)
401     return m_data[0];  // must be only 1 read.
402   else
403     return 0;
404 }
405 
406 /* Re-compresses a `nsMsgKeySet' object.
407 
408    The assumption is made that the `nsMsgKeySet' is syntactically correct
409    (all ranges have a length of at least 1, and all values are non-
410    decreasing) but will optimize the compression, for example, merging
411    consecutive literals or ranges into one range.
412 
413    Returns true if successful, false if there wasn't enough memory to
414    allocate scratch space.
415 
416    #### This should be changed to modify the buffer in place.
417 
418    Also note that we never call Optimize() unless we actually changed
419    something, so it's a great place to tell the MSG_NewsHost* that something
420    changed.
421    */
Optimize()422 bool nsMsgKeySet::Optimize() {
423   int32_t input_size;
424   int32_t output_size;
425   int32_t* input_tail;
426   int32_t* output_data;
427   int32_t* output_tail;
428   int32_t* input_end;
429   int32_t* output_end;
430 
431   input_size = m_length;
432   output_size = input_size + 1;
433   input_tail = m_data;
434   output_data = (int32_t*)PR_Malloc(sizeof(int32_t) * output_size);
435   if (!output_data) return false;
436 
437   output_tail = output_data;
438   input_end = input_tail + input_size;
439   output_end = output_data + output_size;
440 
441   /* We're going to modify the set, so invalidate the cache. */
442   m_cached_value = -1;
443 
444   while (input_tail < input_end) {
445     int32_t from, to;
446     bool range_p = (*input_tail < 0);
447 
448     if (range_p) {
449       /* it's a range */
450       from = input_tail[1];
451       to = from + (-(input_tail[0]));
452 
453       /* Copy it over */
454       *output_tail++ = *input_tail++;
455       *output_tail++ = *input_tail++;
456     } else {
457       /* it's a literal */
458       from = *input_tail;
459       to = from;
460 
461       /* Copy it over */
462       *output_tail++ = *input_tail++;
463     }
464     NS_ASSERTION(output_tail < output_end, "invalid end of output string");
465     if (output_tail >= output_end) {
466       PR_Free(output_data);
467       return false;
468     }
469 
470     /* As long as this chunk is followed by consecutive chunks,
471        keep extending it. */
472     while (input_tail < input_end &&
473            ((*input_tail > 0 &&        /* literal... */
474              *input_tail == to + 1) || /* ...and consecutive, or */
475             (*input_tail <= 0 &&       /* range... */
476              input_tail[1] == to + 1)) /* ...and consecutive. */
477     ) {
478       if (!range_p) {
479         /* convert the literal to a range. */
480         output_tail++;
481         output_tail[-2] = 0;
482         output_tail[-1] = from;
483         range_p = true;
484       }
485 
486       if (*input_tail > 0) { /* literal */
487         output_tail[-2]--;   /* increase length by 1 */
488         to++;
489         input_tail++;
490       } else {
491         int32_t L2 = (-*input_tail) + 1;
492         output_tail[-2] -= L2; /* increase length by N */
493         to += L2;
494         input_tail += 2;
495       }
496     }
497   }
498 
499   PR_Free(m_data);
500   m_data = output_data;
501   m_data_size = output_size;
502   m_length = output_tail - output_data;
503 
504   /* One last pass to turn [N - N+1] into [N, N+1]. */
505   output_tail = output_data;
506   output_end = output_tail + m_length;
507   while (output_tail < output_end) {
508     if (*output_tail < 0) {
509       /* it's a range */
510       if (output_tail[0] == -1) {
511         output_tail[0] = output_tail[1];
512         output_tail[1]++;
513       }
514       output_tail += 2;
515     } else {
516       /* it's a literal */
517       output_tail++;
518     }
519   }
520 
521 #ifdef NEWSRC_DOES_HOST_STUFF
522   if (m_host) m_host->MarkDirty();
523 #endif
524   return true;
525 }
526 
IsMember(int32_t number)527 bool nsMsgKeySet::IsMember(int32_t number) {
528   bool value = false;
529   int32_t size;
530   int32_t* head;
531   int32_t* tail;
532   int32_t* end;
533 
534   size = m_length;
535   head = m_data;
536   tail = head;
537   end = head + size;
538 
539   /* If there is a value cached, and that value is smaller than the
540      value we're looking for, skip forward that far. */
541   if (m_cached_value > 0 && m_cached_value < number) {
542     tail += m_cached_value_index;
543   }
544 
545   while (tail < end) {
546     if (*tail < 0) {
547       /* it's a range */
548       int32_t from = tail[1];
549       int32_t to = from + (-(tail[0]));
550       if (from > number) {
551         /* This range begins after the number - we've passed it. */
552         value = false;
553         goto DONE;
554       } else if (to >= number) {
555         /* In range. */
556         value = true;
557         goto DONE;
558       } else {
559         tail += 2;
560       }
561     } else {
562       /* it's a literal */
563       if (*tail == number) {
564         /* bang */
565         value = true;
566         goto DONE;
567       } else if (*tail > number) {
568         /* This literal is after the number - we've passed it. */
569         value = false;
570         goto DONE;
571       } else {
572         tail++;
573       }
574     }
575   }
576 
577 DONE:
578   /* Store the position of this chunk for next time. */
579   m_cached_value = number;
580   m_cached_value_index = tail - head;
581 
582   return value;
583 }
584 
Add(int32_t number)585 int nsMsgKeySet::Add(int32_t number) {
586   int32_t size;
587   int32_t* head;
588   int32_t* tail;
589   int32_t* end;
590 
591 #ifdef DEBUG_MSGKEYSET
592   printf("add %d\n", number);
593 #endif
594 
595   size = m_length;
596   head = m_data;
597   tail = head;
598   end = head + size;
599 
600   NS_ASSERTION(number >= 0, "can't have negative items");
601   if (number < 0) return 0;
602 
603   /* We're going to modify the set, so invalidate the cache. */
604   m_cached_value = -1;
605 
606   while (tail < end) {
607     if (*tail < 0) {
608       /* it's a range */
609       int32_t from = tail[1];
610       int32_t to = from + (-(tail[0]));
611 
612       if (from <= number && to >= number) {
613         /* This number is already present - we don't need to do
614            anything. */
615         return 0;
616       }
617 
618       if (to > number) {
619         /* We have found the point before which the new number
620            should be inserted. */
621         break;
622       }
623 
624       tail += 2;
625     } else {
626       /* it's a literal */
627       if (*tail == number) {
628         /* This number is already present - we don't need to do
629            anything. */
630         return 0;
631       }
632 
633       if (*tail > number) {
634         /* We have found the point before which the new number
635            should be inserted. */
636         break;
637       }
638 
639       tail++;
640     }
641   }
642 
643   /* At this point, `tail' points to a position in the set which represents
644      a value greater than `new'; or it is at `end'. In the interest of
645      avoiding massive duplication of code, simply insert a literal here and
646      then run the optimizer.
647      */
648   int mid = (tail - head);
649 
650   if (m_data_size <= m_length + 1) {
651     int endo = end - head;
652     if (!Grow()) {
653       // out of memory
654       return -1;
655     }
656     head = m_data;
657     end = head + endo;
658   }
659 
660   if (tail == end) {
661     /* at the end */
662     /* Add a literal to the end. */
663     m_data[m_length++] = number;
664   } else {
665     /* need to insert (or edit) in the middle */
666     int32_t i;
667     for (i = size; i > mid; i--) {
668       m_data[i] = m_data[i - 1];
669     }
670     m_data[i] = number;
671     m_length++;
672   }
673 
674   Optimize();
675   return 1;
676 }
677 
Remove(int32_t number)678 int nsMsgKeySet::Remove(int32_t number) {
679   int32_t size;
680   int32_t* head;
681   int32_t* tail;
682   int32_t* end;
683 #ifdef DEBUG_MSGKEYSET
684   printf("remove %d\n", number);
685 #endif
686 
687   size = m_length;
688   head = m_data;
689   tail = head;
690   end = head + size;
691 
692   // **** I am not sure this is a right thing to comment the following
693   // statements out. The reason for this is due to the implementation of
694   // offline save draft and template. We use faked UIDs (negative ids) for
695   // offline draft and template in order to distinguish them from real
696   // UID. David I need your help here. **** jt
697 
698   // PR_ASSERT(number >= 0);
699   // if (number < 0) {
700   //  return -1;
701   /// }
702 
703   /* We're going to modify the set, so invalidate the cache. */
704   m_cached_value = -1;
705 
706   while (tail < end) {
707     int32_t mid = (tail - m_data);
708 
709     if (*tail < 0) {
710       /* it's a range */
711       int32_t from = tail[1];
712       int32_t to = from + (-(tail[0]));
713 
714       if (number < from || number > to) {
715         /* Not this range */
716         tail += 2;
717         continue;
718       }
719 
720       if (to == from + 1) {
721         /* If this is a range [N - N+1] and we are removing M
722            (which must be either N or N+1) replace it with a
723            literal. This reduces the length by 1. */
724         m_data[mid] = (number == from ? to : from);
725         while (++mid < m_length) {
726           m_data[mid] = m_data[mid + 1];
727         }
728         m_length--;
729         Optimize();
730         return 1;
731       } else if (to == from + 2) {
732         /* If this is a range [N - N+2] and we are removing M,
733            replace it with the literals L,M (that is, either
734            (N, N+1), (N, N+2), or (N+1, N+2). The overall
735            length remains the same. */
736         m_data[mid] = from;
737         m_data[mid + 1] = to;
738         if (from == number) {
739           m_data[mid] = from + 1;
740         } else if (to == number) {
741           m_data[mid + 1] = to - 1;
742         }
743         Optimize();
744         return 1;
745       } else if (from == number) {
746         /* This number is at the beginning of a long range (meaning a
747            range which will still be long enough to remain a range.)
748            Increase start and reduce length of the range. */
749         m_data[mid]++;
750         m_data[mid + 1]++;
751         Optimize();
752         return 1;
753       } else if (to == number) {
754         /* This number is at the end of a long range (meaning a range
755            which will still be long enough to remain a range.)
756            Just decrease the length of the range. */
757         m_data[mid]++;
758         Optimize();
759         return 1;
760       } else {
761         /* The number being deleted is in the middle of a range which
762            must be split. This increases overall length by 2.
763            */
764         int32_t i;
765         int endo = end - head;
766         if (m_data_size - m_length <= 2) {
767           if (!Grow())
768             // out of memory
769             return -1;
770         }
771         head = m_data;
772         end = head + endo;
773 
774         for (i = m_length + 2; i > mid + 2; i--) {
775           m_data[i] = m_data[i - 2];
776         }
777 
778         m_data[mid] = (-(number - from - 1));
779         m_data[mid + 1] = from;
780         m_data[mid + 2] = (-(to - number - 1));
781         m_data[mid + 3] = number + 1;
782         m_length += 2;
783 
784         /* Oops, if we've ended up with a range with a 0 length,
785            which is illegal, convert it to a literal, which reduces
786            the overall length by 1. */
787         if (m_data[mid] == 0) {
788           /* first range */
789           m_data[mid] = m_data[mid + 1];
790           for (i = mid + 1; i < m_length; i++) {
791             m_data[i] = m_data[i + 1];
792           }
793           m_length--;
794         }
795         if (m_data[mid + 2] == 0) {
796           /* second range */
797           m_data[mid + 2] = m_data[mid + 3];
798           for (i = mid + 3; i < m_length; i++) {
799             m_data[i] = m_data[i + 1];
800           }
801           m_length--;
802         }
803         Optimize();
804         return 1;
805       }
806     } else {
807       /* it's a literal */
808       if (*tail != number) {
809         /* Not this literal */
810         tail++;
811         continue;
812       }
813 
814       /* Excise this literal. */
815       m_length--;
816       while (mid < m_length) {
817         m_data[mid] = m_data[mid + 1];
818         mid++;
819       }
820       Optimize();
821       return 1;
822     }
823   }
824 
825   /* It wasn't here at all. */
826   return 0;
827 }
828 
msg_emit_range(int32_t * tmp,int32_t a,int32_t b)829 static int32_t* msg_emit_range(int32_t* tmp, int32_t a, int32_t b) {
830   if (a == b) {
831     *tmp++ = a;
832   } else {
833     NS_ASSERTION(a < b && a >= 0, "range is out of order");
834     *tmp++ = -(b - a);
835     *tmp++ = a;
836   }
837   return tmp;
838 }
839 
AddRange(int32_t start,int32_t end)840 int nsMsgKeySet::AddRange(int32_t start, int32_t end) {
841   int32_t tmplength;
842   int32_t* tmp;
843   int32_t* in;
844   int32_t* out;
845   int32_t* tail;
846   int32_t a;
847   int32_t b;
848   bool didit = false;
849 
850   /* We're going to modify the set, so invalidate the cache. */
851   m_cached_value = -1;
852 
853   NS_ASSERTION(start <= end, "invalid range");
854   if (start > end) return -1;
855 
856   if (start == end) {
857     return Add(start);
858   }
859 
860   tmplength = m_length + 2;
861   tmp = (int32_t*)PR_Malloc(sizeof(int32_t) * tmplength);
862 
863   if (!tmp)
864     // out of memory
865     return -1;
866 
867   in = m_data;
868   out = tmp;
869   tail = in + m_length;
870 
871 #define EMIT(x, y) out = msg_emit_range(out, x, y)
872 
873   while (in < tail) {
874     // Set [a,b] to be this range.
875     if (*in < 0) {
876       b = -*in++;
877       a = *in++;
878       b += a;
879     } else {
880       a = b = *in++;
881     }
882 
883     if (a <= start && b >= end) {
884       // We already have the entire range marked.
885       PR_Free(tmp);
886       return 0;
887     }
888     if (start > b + 1) {
889       // No overlap yet.
890       EMIT(a, b);
891     } else if (end < a - 1) {
892       // No overlap, and we passed it.
893       EMIT(start, end);
894       EMIT(a, b);
895       didit = true;
896       break;
897     } else {
898       // The ranges overlap.  Suck this range into our new range, and
899       // keep looking for other ranges that might overlap.
900       start = start < a ? start : a;
901       end = end > b ? end : b;
902     }
903   }
904   if (!didit) EMIT(start, end);
905   while (in < tail) {
906     *out++ = *in++;
907   }
908 
909 #undef EMIT
910 
911   PR_Free(m_data);
912   m_data = tmp;
913   m_length = out - tmp;
914   m_data_size = tmplength;
915 #ifdef NEWSRC_DOES_HOST_STUFF
916   if (m_host) m_host->MarkDirty();
917 #endif
918   return 1;
919 }
920 
CountMissingInRange(int32_t range_start,int32_t range_end)921 int32_t nsMsgKeySet::CountMissingInRange(int32_t range_start,
922                                          int32_t range_end) {
923   int32_t count;
924   int32_t* head;
925   int32_t* tail;
926   int32_t* end;
927 
928   NS_ASSERTION(range_start >= 0 && range_end >= 0 && range_end >= range_start,
929                "invalid range");
930   if (range_start < 0 || range_end < 0 || range_end < range_start) return -1;
931 
932   head = m_data;
933   tail = head;
934   end = head + m_length;
935 
936   count = range_end - range_start + 1;
937 
938   while (tail < end) {
939     if (*tail < 0) {
940       /* it's a range */
941       int32_t from = tail[1];
942       int32_t to = from + (-(tail[0]));
943       if (from < range_start) from = range_start;
944       if (to > range_end) to = range_end;
945 
946       if (to >= from) count -= (to - from + 1);
947 
948       tail += 2;
949     } else {
950       /* it's a literal */
951       if (*tail >= range_start && *tail <= range_end) count--;
952       tail++;
953     }
954     NS_ASSERTION(count >= 0, "invalid count");
955   }
956   return count;
957 }
958 
FirstMissingRange(int32_t min,int32_t max,int32_t * first,int32_t * last)959 int nsMsgKeySet::FirstMissingRange(int32_t min, int32_t max, int32_t* first,
960                                    int32_t* last) {
961   int32_t size;
962   int32_t* head;
963   int32_t* tail;
964   int32_t* end;
965   int32_t from = 0;
966   int32_t to = 0;
967   int32_t a;
968   int32_t b;
969 
970   NS_ASSERTION(first && last, "invalid parameter");
971   if (!first || !last) return -1;
972 
973   *first = *last = 0;
974 
975   NS_ASSERTION(min <= max && min > 0, "invalid min or max param");
976   if (min > max || min <= 0) return -1;
977 
978   size = m_length;
979   head = m_data;
980   tail = head;
981   end = head + size;
982 
983   while (tail < end) {
984     a = to + 1;
985     if (*tail < 0) { /* We got a range. */
986       from = tail[1];
987       to = from + (-(tail[0]));
988       tail += 2;
989     } else {
990       from = to = tail[0];
991       tail++;
992     }
993     b = from - 1;
994     /* At this point, [a,b] is the range of unread articles just before
995        the current range of read articles [from,to].  See if this range
996        intersects the [min,max] range we were given. */
997     if (a > max) return 0; /* It's hopeless; there are none. */
998     if (a <= b && b >= min) {
999       /* Ah-hah!  We found an intersection. */
1000       *first = a > min ? a : min;
1001       *last = b < max ? b : max;
1002       return 0;
1003     }
1004   }
1005   /* We found no holes in the newsrc that overlaps the range, nor did we hit
1006      something read beyond the end of the range.  So, the great infinite
1007      range of unread articles at the end of any newsrc line intersects the
1008      range we want, and we just need to return that. */
1009   a = to + 1;
1010   *first = a > min ? a : min;
1011   *last = max;
1012   return 0;
1013 }
1014 
1015 // I'm guessing we didn't include this because we didn't think we're going
1016 // to need it. I'm not so sure. I'm putting it in for now.
LastMissingRange(int32_t min,int32_t max,int32_t * first,int32_t * last)1017 int nsMsgKeySet::LastMissingRange(int32_t min, int32_t max, int32_t* first,
1018                                   int32_t* last) {
1019   int32_t size;
1020   int32_t* head;
1021   int32_t* tail;
1022   int32_t* end;
1023   int32_t from = 0;
1024   int32_t to = 0;
1025   int32_t a;
1026   int32_t b;
1027 
1028   NS_ASSERTION(first && last, "invalid null param");
1029   if (!first || !last) return -1;
1030 
1031   *first = *last = 0;
1032 
1033   NS_ASSERTION(min <= max && min > 0, "invalid min or max");
1034   if (min > max || min <= 0) return -1;
1035 
1036   size = m_length;
1037   head = m_data;
1038   tail = head;
1039   end = head + size;
1040 
1041   while (tail < end) {
1042     a = to + 1;
1043     if (*tail < 0) { /* We got a range. */
1044       from = tail[1];
1045       to = from + (-(tail[0]));
1046       tail += 2;
1047     } else {
1048       from = to = tail[0];
1049       tail++;
1050     }
1051     b = from - 1;
1052     /* At this point, [a,b] is the range of unread articles just before
1053        the current range of read articles [from,to]. See if this range
1054        intersects the [min,max] range we were given. */
1055     if (a > max)
1056       return 0; /* We're done. If we found something, it's already
1057                    sitting in [*first,*last]. */
1058     if (a <= b && b >= min) {
1059       /* Ah-hah! We found an intersection. */
1060       *first = a > min ? a : min;
1061       *last = b < max ? b : max;
1062       /* Continue on, looking for a later range. */
1063     }
1064   }
1065   if (to < max) {
1066     /* The great infinite range of unread articles at the end of any newsrc
1067        line intersects the range we want, and we just need to return that. */
1068     a = to + 1;
1069     *first = a > min ? a : min;
1070     *last = max;
1071   }
1072   return 0;
1073 }
1074 
1075 /**
1076  * Fill the passed in aArray with the keys in the message key set.
1077  */
ToMsgKeyArray(nsTArray<nsMsgKey> & aArray)1078 nsresult nsMsgKeySet::ToMsgKeyArray(nsTArray<nsMsgKey>& aArray) {
1079   int32_t size;
1080   int32_t* head;
1081   int32_t* tail;
1082   int32_t* end;
1083   int32_t last_art = -1;
1084 
1085   size = m_length;
1086   head = m_data;
1087   tail = head;
1088   end = head + size;
1089 
1090   while (tail < end) {
1091     int32_t from;
1092     int32_t to;
1093 
1094     if (*tail < 0) {
1095       /* it's a range */
1096       from = tail[1];
1097       to = from + (-(tail[0]));
1098       tail += 2;
1099     } else /* it's a literal */
1100     {
1101       from = *tail;
1102       to = from;
1103       tail++;
1104     }
1105     // The horrible news-hack used to adjust from to 1 if it was zero right
1106     // here, but there is no longer a consumer of this method with that
1107     // broken use-case.
1108     if (from <= last_art) from = last_art + 1;
1109     if (from <= to) {
1110       if (from < to) {
1111         for (int32_t i = from; i <= to; ++i) {
1112           aArray.AppendElement(i);
1113         }
1114       } else {
1115         aArray.AppendElement(from);
1116       }
1117       last_art = to;
1118     }
1119   }
1120 
1121   return NS_OK;
1122 }
1123 
1124 #ifdef DEBUG /* A lot of test cases for the above */
1125 
1126 #  define countof(x) (sizeof(x) / sizeof(*(x)))
1127 
test_decoder(const char * string)1128 void nsMsgKeySet::test_decoder(const char* string) {
1129   nsMsgKeySet set(string /* , NULL */);
1130   char* tmp;
1131   set.Output(&tmp);
1132   printf("\t\"%s\"\t--> \"%s\"\n", string, tmp);
1133   free(tmp);
1134 }
1135 
1136 #  define START(STRING) \
1137     string = STRING;    \
1138     if (!(set = nsMsgKeySet::Create(string))) abort()
1139 
1140 #  define FROB(N, PUSHP)                                                  \
1141     i = N;                                                                \
1142     if (!(NS_SUCCEEDED(set->Output(&s)))) abort();                        \
1143     printf("%3lu: %-58s %c %3lu =\n", (unsigned long)set->m_length, s,    \
1144            (PUSHP ? '+' : '-'), (unsigned long)i);                        \
1145     free(s);                                                              \
1146     if (PUSHP ? set->Add(i) < 0 : set->Remove(i) < 0) abort();            \
1147     if (!(NS_SUCCEEDED(set->Output(&s)))) abort();                        \
1148     printf("%3lu: %-58s optimized =\n", (unsigned long)set->m_length, s); \
1149     free(s);
1150 
1151 #  define END()                                              \
1152     if (!(NS_SUCCEEDED(set->Output(&s)))) abort();           \
1153     printf("%3lu: %s\n\n", (unsigned long)set->m_length, s); \
1154     free(s);                                                 \
1155     delete set;
1156 
test_adder(void)1157 void nsMsgKeySet::test_adder(void) {
1158   const char* string;
1159   nsMsgKeySet* set;
1160   char* s;
1161   int32_t i;
1162 
1163   START("0-70,72-99,105,107,110-111,117-200");
1164 
1165   FROB(205, true);
1166   FROB(206, true);
1167   FROB(207, true);
1168   FROB(208, true);
1169   FROB(208, true);
1170   FROB(109, true);
1171   FROB(72, true);
1172 
1173   FROB(205, false);
1174   FROB(206, false);
1175   FROB(207, false);
1176   FROB(208, false);
1177   FROB(208, false);
1178   FROB(109, false);
1179   FROB(72, false);
1180 
1181   FROB(72, true);
1182   FROB(109, true);
1183   FROB(208, true);
1184   FROB(208, true);
1185   FROB(207, true);
1186   FROB(206, true);
1187   FROB(205, true);
1188 
1189   FROB(205, false);
1190   FROB(206, false);
1191   FROB(207, false);
1192   FROB(208, false);
1193   FROB(208, false);
1194   FROB(109, false);
1195   FROB(72, false);
1196 
1197   FROB(100, true);
1198   FROB(101, true);
1199   FROB(102, true);
1200   FROB(103, true);
1201   FROB(106, true);
1202   FROB(104, true);
1203   FROB(109, true);
1204   FROB(108, true);
1205   END();
1206 
1207   // clang-format off
1208   START("1-6"); FROB(7, false); END();
1209   START("1-6"); FROB(6, false); END();
1210   START("1-6"); FROB(5, false); END();
1211   START("1-6"); FROB(4, false); END();
1212   START("1-6"); FROB(3, false); END();
1213   START("1-6"); FROB(2, false); END();
1214   START("1-6"); FROB(1, false); END();
1215   START("1-6"); FROB(0, false); END();
1216 
1217   START("1-3"); FROB(1, false); END();
1218   START("1-3"); FROB(2, false); END();
1219   START("1-3"); FROB(3, false); END();
1220 
1221   START("1,3,5-7,9,10"); FROB(5, false); END();
1222   START("1,3,5-7,9,10"); FROB(6, false); END();
1223   START("1,3,5-7,9,10"); FROB(7, false); FROB(7, true); FROB(8, true);
1224   FROB (4, true); FROB (2, false); FROB (2, true);
1225 
1226   FROB (4, false); FROB (5, false); FROB (6, false); FROB (7, false);
1227   FROB (8, false); FROB (9, false); FROB (10, false); FROB (3, false);
1228   FROB (2, false); FROB (1, false); FROB (1, false); FROB (0, false);
1229   END();
1230   // clang-format on
1231 }
1232 
1233 #  undef START
1234 #  undef FROB
1235 #  undef END
1236 
1237 #  define START(STRING) \
1238     string = STRING;    \
1239     if (!(set = nsMsgKeySet::Create(string))) abort()
1240 
1241 #  define FROB(N, M)                                                       \
1242     i = N;                                                                 \
1243     j = M;                                                                 \
1244     if (!(NS_SUCCEEDED(set->Output(&s)))) abort();                         \
1245     printf("%3lu: %-58s + %3lu-%3lu =\n", (unsigned long)set->m_length, s, \
1246            (unsigned long)i, (unsigned long)j);                            \
1247     free(s);                                                               \
1248     switch (set->AddRange(i, j)) {                                         \
1249       case 0:                                                              \
1250         printf("(no-op)\n");                                               \
1251         break;                                                             \
1252       case 1:                                                              \
1253         break;                                                             \
1254       default:                                                             \
1255         abort();                                                           \
1256     }                                                                      \
1257     if (!(NS_SUCCEEDED(set->Output(&s)))) abort();                         \
1258     printf("%3lu: %-58s\n", (unsigned long)set->m_length, s);              \
1259     free(s);
1260 
1261 #  define END()                                              \
1262     if (!(NS_SUCCEEDED(set->Output(&s)))) abort();           \
1263     printf("%3lu: %s\n\n", (unsigned long)set->m_length, s); \
1264     free(s);                                                 \
1265     delete set;
1266 
test_ranges(void)1267 void nsMsgKeySet::test_ranges(void) {
1268   const char* string;
1269   nsMsgKeySet* set;
1270   char* s;
1271   int32_t i;
1272   int32_t j;
1273 
1274   START("20-40,72-99,105,107,110-111,117-200");
1275 
1276   FROB(205, 208);
1277   FROB(50, 70);
1278   FROB(0, 10);
1279   FROB(112, 113);
1280   FROB(101, 101);
1281   FROB(5, 75);
1282   FROB(103, 109);
1283   FROB(2, 20);
1284   FROB(1, 9999);
1285 
1286   END();
1287 
1288 #  undef START
1289 #  undef FROB
1290 #  undef END
1291 }
1292 
1293 #  define TEST(N)                                                    \
1294     if (!with_cache) set->m_cached_value = -1;                       \
1295     if (!(NS_SUCCEEDED(set->Output(&s)))) abort();                   \
1296     printf(" %3d = %s\n", N, (set->IsMember(N) ? "true" : "false")); \
1297     free(s);
1298 
test_member(bool with_cache)1299 void nsMsgKeySet::test_member(bool with_cache) {
1300   nsMsgKeySet* set;
1301   char* s;
1302 
1303   const char* st1 = "1-70,72-99,105,107,110-111,117-200";
1304   printf("\n\nTesting %s (with%s cache)\n", st1, with_cache ? "" : "out");
1305   if (!(set = Create(st1))) {
1306     abort();
1307   }
1308 
1309   TEST(-1);
1310   TEST(0);
1311   TEST(1);
1312   TEST(20);
1313 
1314   delete set;
1315   const char* st2 = "0-70,72-99,105,107,110-111,117-200";
1316   printf("\n\nTesting %s (with%s cache)\n", st2, with_cache ? "" : "out");
1317   if (!(set = Create(st2))) {
1318     abort();
1319   }
1320 
1321   TEST(-1);
1322   TEST(0);
1323   TEST(1);
1324   TEST(20);
1325   TEST(69);
1326   TEST(70);
1327   TEST(71);
1328   TEST(72);
1329   TEST(73);
1330   TEST(74);
1331   TEST(104);
1332   TEST(105);
1333   TEST(106);
1334   TEST(107);
1335   TEST(108);
1336   TEST(109);
1337   TEST(110);
1338   TEST(111);
1339   TEST(112);
1340   TEST(116);
1341   TEST(117);
1342   TEST(118);
1343   TEST(119);
1344   TEST(200);
1345   TEST(201);
1346   TEST(65535);
1347 
1348   delete set;
1349 }
1350 
1351 #  undef TEST
1352 
1353 // static void
1354 // test_newsrc (char *file)
1355 // {
1356 //   FILE *fp = fopen (file, "r");
1357 //   char buf [1024];
1358 //   if (! fp) abort ();
1359 //   while (fgets (buf, sizeof (buf), fp))
1360 //   {
1361 //     if (!strncmp (buf, "options ", 8))
1362 //     fwrite (buf, 1, strlen (buf), stdout);
1363 //     else
1364 //     {
1365 //       char *sep = buf;
1366 //       while (*sep != 0 && *sep != ':' && *sep != '!')
1367 //       sep++;
1368 //       if (*sep) sep++;
1369 //       while (isspace (*sep)) sep++;
1370 //       fwrite (buf, 1, sep - buf, stdout);
1371 //       if (*sep)
1372 //       {
1373 //         char *s;
1374 //         msg_NewsRCSet *set = msg_parse_newsrc_set (sep, &allocinfo);
1375 //         if (! set)
1376 //         abort ();
1377 //         if (! msg_OptimizeNewsRCSet (set))
1378 //         abort ();
1379 //         if (! ((s = msg_format_newsrc_set (set))))
1380 //         abort ();
1381 //         msg_free_newsrc_set (set, &allocinfo);
1382 //         fwrite (s, 1, strlen (s), stdout);
1383 //         free (s);
1384 //         fwrite ("\n", 1, 1, stdout);
1385 //       }
1386 //     }
1387 //   }
1388 //   fclose (fp);
1389 // }
1390 
RunTests()1391 void nsMsgKeySet::RunTests() {
1392   test_decoder("");
1393   test_decoder(" ");
1394   test_decoder("0");
1395   test_decoder("1");
1396   test_decoder("123");
1397   test_decoder(" 123 ");
1398   test_decoder(" 123 4");
1399   test_decoder(" 1,2, 3, 4");
1400   test_decoder("0-70,72-99,100,101");
1401   test_decoder(" 0-70 , 72 - 99 ,100,101 ");
1402   test_decoder("0 - 268435455");
1403   /* This one overflows - we can't help it.
1404    test_decoder ("0 - 4294967295"); */
1405 
1406   test_adder();
1407 
1408   test_ranges();
1409 
1410   test_member(false);
1411   test_member(true);
1412 
1413   // test_newsrc ("/u/montulli/.newsrc");
1414   /* test_newsrc ("/u/jwz/.newsrc");*/
1415 }
1416 
1417 #endif /* DEBUG */
1418