1 // Copyright 2009 The Archiveopteryx Developers <info@aox.org>
2 
3 #include "estring.h"
4 
5 #include "allocator.h"
6 
7 // stderr, fprintf
8 #include <stdio.h>
9 // strlen
10 #include <string.h>
11 
12 
13 /*! \class EStringData estring.h
14 
15     This private helper class contains the actual string data. It has
16     three fields, all accessible only to string. The only noteworthy
17     field is max, which is 0 in the case of a shared/read-only string,
18     and nonzero in the case of a string which can be modified.
19 */
20 
21 
22 /*! \fn EStringData::EStringData()
23 
24     Creates a zero-length string. This is naturally read-only.
25 */
26 
27 /*! Creates a new EString with \a bytes capacity. */
28 
EStringData(int bytes)29 EStringData::EStringData( int bytes )
30     : str( 0 ), len( 0 ), max( bytes )
31 {
32     if ( str )
33         str = (char*)Allocator::alloc( max, 0 );
34 }
35 
36 
operator new(size_t ownSize,uint extra)37 void * EStringData::operator new( size_t ownSize, uint extra )
38 {
39     return Allocator::alloc( ownSize + extra, 1 );
40 }
41 
42 
43 /*! \class EString estring.h
44     An email-oriented 8-bit string class.
45 
46     The string data are counted, so null bytes are allowed, and most
47     operations are very fast.
48 
49     The data structure uses a simplified variant of reference counting,
50     where only "one" and "many" are possible. The detach() function
51     ensures that the count is "one" afterwards. Many functions leave
52     the count on "many", even ones such as mid().
53 
54     The usual string functions are implemented, along with a variety
55     of email-specific operations such as eQP(), deQP(), needsQP(),
56     e64(). boring() returns true if the string can be used unquoted in
57     e.g. MIME, quoted() quotes it. upper() and lower() have a third
58     sibling, headerCased(). simplified() and trimmed() remove white
59     space in ways email often needs.
60 
61     Several people have asked why we use this class instead of
62     std::string, and UString instead of std::basic_string<uint>. At
63     least one told us we were wrong and wheel reinvention and so on.
64 
65     The reason is that working with email using EString is fairly
66     pleasant. Doing the same kind of work with std::string is a pain.
67 
68     Both wheels are round. EString carries its load and moves it
69     smoothly along.
70 */
71 
72 /*! Creates an empty EString */
73 
EString()74 EString::EString()
75     : d( 0 )
76 {
77 }
78 
79 /*! Creates a EString from the NUL-terminated string \a s.
80     The NUL is not copied. */
81 
EString(const char * s)82 EString::EString( const char *s )
83     : d( 0 )
84 {
85     *this = s;
86 }
87 
88 /*! Creates a EString from the first \a n bytes of \a s, which may
89     contain NULs. */
90 
EString(const char * s,uint n)91 EString::EString( const char *s, uint n )
92     : d( 0 )
93 {
94     append( s, n );
95 }
96 
97 
98 /*! Creates a copy of \a s. */
99 
EString(const EString & s)100 EString::EString( const EString &s )
101     : Garbage(), d( 0 )
102 {
103     *this = s;
104 }
105 
106 
107 /*! \fn void EString::detach()
108 
109     Ensures that the string is modifiable. All EString functions call
110     this prior to modifying the string. */
111 
112 
113 /*! Destroys the string.
114 
115     Because EString is used so much, and can eat up such vast amounts
116     of memory so quickly, this destructor does something: If the
117     string is the sole owner of its data, it frees them.
118 
119     As of April 2005, the return values of data() or cstr() are NO
120     LONGER valid after a string has gone out of scope or otherwise been
121     lost.
122 */
123 
~EString()124 EString::~EString()
125 {
126     if ( d && d->max )
127         Allocator::dealloc( d );
128     d = 0;
129 }
130 
131 
132 /*! Deletes \a p. (This function exists only so that gcc -O3 doesn't
133     decide that EString objects don't need destruction.)
134 */
135 
operator delete(void * p)136 void EString::operator delete( void *p )
137 {
138     EStringData * & d = ((EString *)p)->d;
139     if ( d && d->max )
140         Allocator::dealloc( d );
141     d = 0;
142 }
143 
144 
145 /*! Copies \a other to this string and returns a reference to this
146     string. */
147 
operator =(const EString & other)148 EString & EString::operator=( const EString & other )
149 {
150     d = other.d;
151     if ( d )
152         d->max = 0;
153     return *this;
154 }
155 
156 
157 /*! Copies \a s to this string and returns a reference to this
158     string. If \a s is a null pointer, the result is an empty string. */
159 
operator =(const char * s)160 EString & EString::operator=( const char * s )
161 {
162     if ( !s || !*s ) {
163         d = 0;
164         return *this;
165     }
166 
167     uint len = strlen( s );
168     if ( d && d->max )
169         d->len = 0;
170     reserve( len );
171     d->len = len;
172     memmove( d->str, s, d->len );
173     return *this;
174 }
175 
176 
177 /*! \fn uint EString::length() const
178 
179     Returns the length of the string. The length does not include any
180     terminator or padding. */
181 
182 
183 /*! \fn uint EString::capacity() const
184 
185     Returns the capacity of the string variable, that is, how long the
186     string can be before it has to allocate memory.
187 */
188 
189 
190 /*! \fn const char *EString::data() const
191 
192     Returns a pointer to the string's byte representation, which is
193     NOT necessarily zero-terminated. */
194 
195 
196 /*! Returns the zero-terminated byte representation of the
197     string. Note that even though the return value is zero-terminated,
198     it can also contain null bytes in the middle.
199 
200     Even though this function modifies memory, it doesn't detach(),
201     since it doesn't modify the string. However, in most cases its
202     call to reserve() causes a detach().
203 */
204 
cstr()205 const char * EString::cstr()
206 {
207     reserve( length()+1 );
208     d->str[d->len] = '\0';
209     return data();
210 }
211 
212 
213 /*! This const version of cstr() is the same as the non-const version
214     above. The only difference is that it can be called on a const
215     object, and that it may cause some memory allocation elsewhere.
216 */
217 
cstr() const218 const char * EString::cstr() const
219 {
220     if ( d && d->max > d->len ) {
221         d->str[d->len] = '\0';
222         return data();
223     }
224     EString tmp;
225     tmp.reserve( length() + 1 );
226     tmp = *this;
227     return tmp.cstr();
228 }
229 
230 
231 /*! Returns a copy of this string where all upper-case letters (A-Z -
232     this is ASCII only) have been changed to lower case. */
233 
lower() const234 EString EString::lower() const
235 {
236     EString result( *this );
237     uint i = 0;
238     while ( i < result.length() ) {
239         if ( result[i] >= 'A' && result[i] <= 'Z' ) {
240             result.detach();
241             result.d->str[i] = result.d->str[i] + 32;
242         }
243         i++;
244     }
245     return result;
246 }
247 
248 
249 /*! Returns a copy of this string where all lower-case letters (a-z -
250     this is ASCII only) have been changed to upper case. */
251 
upper() const252 EString EString::upper() const
253 {
254     EString result( *this );
255     uint i = 0;
256     while ( i < result.length() ) {
257         if ( result[i] >= 'a' && result[i] <= 'z' ) {
258             result.detach();
259             result.d->str[i] = result.d->str[i] - 32;
260         }
261         i++;
262     }
263     return result;
264 }
265 
266 
267 /*! Returns a copy of this string where all letters have been changed
268   to conform to typical mail header practice: Letters following digits
269   and other letters are lower-cased. Other letters are upper-cased
270   (notably including the very first character). */
271 
headerCased() const272 EString EString::headerCased() const
273 {
274     EString result( *this );
275     uint i = 0;
276     bool u = true;
277     while ( i < length() ) {
278         if ( u && result[i] >= 'a' && result[i] <= 'z' ) {
279             result.detach();
280             result.d->str[i] = result[i] - 32;
281         }
282         else if ( !u && result[i] >= 'A' && result[i] <= 'Z' ) {
283             result.detach();
284             result.d->str[i] = result[i] + 32;
285         }
286         if ( ( result[i] >= 'A' && result[i] <= 'Z' ) ||
287              ( result[i] >= 'a' && result[i] <= 'z' ) ||
288              ( result[i] >= '0' && result[i] <= '9' ) )
289             u = false;
290         else
291             u = true;
292         i++;
293     }
294     return result;
295 }
296 
297 
298 
299 
300 
301 /*! Returns the position of the first occurence of \a c on or after \a i
302     in this string, or -1 if there is none.
303 */
304 
find(char c,int i) const305 int EString::find( char c, int i ) const
306 {
307     while ( i < (int)length() && at( i ) != c )
308         i++;
309     if ( i < (int)length() )
310         return i;
311     return -1;
312 }
313 
314 
315 /*! Returns the position of the first occurence of \a s on or after \a i
316     in this string, or -1 if there is none.
317 */
318 
find(const EString & s,int i) const319 int EString::find( const EString & s, int i ) const
320 {
321     uint j = 0;
322     while ( j < s.length() && i+j < length() ) {
323         if ( d->str[i+j] == s.d->str[j] ) {
324             j++;
325         }
326         else {
327             j = 0;
328             i++;
329         }
330     }
331     if ( j == s.length() )
332         return i;
333     return -1;
334 }
335 
336 
337 /*! Returns section \a n of this string, where a section is defined as
338     a run of sequences separated by \a s. If \a s is the empty string
339     or \a n is 0, section() returns this entire string. If this string
340     contains fewer instances of \a s than \a n (ie. section \a n is
341     after the end of the string), section returns an empty string.
342 */
343 
section(const EString & s,uint n) const344 EString EString::section( const EString & s, uint n ) const
345 {
346     if ( s.isEmpty() || n == 0 )
347         return *this;
348 
349     int b = 0;
350     while ( n && b <= (int)length() ) {
351         int e = find( s, b );
352         if ( e < 0 )
353             e = length();
354         if ( n == 1 )
355             return mid( b, e - b );
356         n--;
357         b = e + s.length();
358     }
359     return "";
360 }
361 
362 
363 /*! Appends \a other to this string. */
364 
append(const EString & other)365 void EString::append( const EString & other )
366 {
367     if ( !other.length() )
368         return;
369     if ( !length() && ( !modifiable() || d->max < other.length() ) ) {
370         // if this isn't modifiable, we just make a copy of the other
371         // string. only sensible thing to do. if it's modifiable, but
372         // we don't have enough bytes, we also just glue ourselves
373         // onto the other. maybe we'll need to copy later, but maybe
374         // not.
375         *this = other;
376         return;
377     }
378     reserve( length() + other.length() );
379     memmove( d->str+d->len, other.d->str, other.d->len );
380     d->len += other.d->len;
381 }
382 
383 
384 /*! \overload
385     This version of append() appends \a num raw bytes from memory
386     \a base. If \a base is null, this function does nothing.
387 */
388 
append(const char * base,uint num)389 void EString::append( const char * base, uint num )
390 {
391     if ( !base || !num )
392         return;
393 
394     reserve( length() + num );
395     memmove( d->str + d->len, base, num );
396     d->len += num;
397 }
398 
399 
400 /*! \overload
401 
402     This version of append() appends the null-terminated string \a s,
403     or does nothing if \a s is null.
404 */
405 
append(const char * s)406 void EString::append( const char * s )
407 {
408     if ( s )
409         append( s, strlen( s ) );
410 }
411 
412 
413 /*! \overload
414     This version of append() appends the single character \a c.
415 */
416 
append(char c)417 void EString::append( char c )
418 {
419     reserve( length() + 1 );
420     d->str[d->len] = c;
421     d->len++;
422 }
423 
424 
425 /*! Prepends \a other to this string. */
426 
prepend(const EString & other)427 void EString::prepend( const EString & other )
428 {
429     if ( other.isEmpty() )
430         return;
431     reserve( length() + other.length() );
432     memmove( d->str + other.length(), d->str, length() );
433     memmove( d->str, other.d->str, other.length() );
434     setLength( length() + other.length() );
435 }
436 
437 
appendNumber(int n,int base)438 void EString::appendNumber( int n, int base )
439 {
440     if ( n < 0 ) {
441         append( '-' );
442         n = -n;
443     }
444     appendNumber( (uint)n, base );
445 }
446 
appendNumber(uint n,int base)447 void EString::appendNumber( uint n, int base )
448 {
449     EString s( fromNumber( n, base ) );
450 
451     if ( n > 0 )
452         append( s );
453     else
454         append( "0" );
455 }
456 
457 /*! Ensures that there is at least \a num bytes available in this
458     string. This implicitly causes the string to become modifiable and
459     have a nonzero number of available bytes.
460 
461     After calling reserve(), capacity() is at least as large as \a
462     num, while length() has not changed.
463 */
464 
reserve(uint num)465 void EString::reserve( uint num )
466 {
467     if ( num < length() )
468         num = length();
469     if ( !num )
470         num = 1;
471     if ( !d || d->max < num )
472         reserve2( num );
473 }
474 
475 /*! Equivalent to reserve(). reserve( \a num ) calls this function to
476     do the heavy lifting. This function is not inline, while reserve()
477     is, and calls to this function should be interesting wrt. memory
478     allocation statistics.
479 
480     Noone except reserve() should call reserve2().
481 */
482 
reserve2(uint num)483 void EString::reserve2( uint num )
484 {
485     num = Allocator::rounded( num + sizeof( EStringData ) ) - sizeof( EStringData );
486 
487     EStringData * freeable = 0;
488     if ( d && d->max )
489         freeable = d;
490 
491     EStringData * nd = new( num ) EStringData( 0 );
492     nd->max = num;
493     nd->str = sizeof( EStringData ) + (char*)nd;
494     if ( d )
495         nd->len = d->len;
496     if ( nd->len > num )
497         nd->len = num;
498     if ( d && d->len )
499         memmove( nd->str, d->str, nd->len );
500     d = nd;
501 
502     if ( freeable )
503         Allocator::dealloc( freeable );
504 }
505 
506 
507 /*! Ensures that the string's length is either \a l or length(),
508     whichever is smaller. If \a l is 0 (the default), the string will be
509     empty after the function is called.
510 */
511 
truncate(uint l)512 void EString::truncate( uint l )
513 {
514     if ( !l ) {
515         if ( d && d->max )
516             Allocator::dealloc( d );
517         d = 0;
518     }
519     else if ( l < length() ) {
520         detach();
521         d->len = l;
522     }
523 }
524 
525 
526 /*! Ensures that the string's length is \a l. If \a l is 0, the string
527     will be empty after the function is called. If \a l is longer than
528     the string used to be, the new part is uninitialised.
529 */
530 
setLength(uint l)531 void EString::setLength( uint l )
532 {
533     reserve( l );
534     if ( l )
535         d->len = d->max;
536     truncate( l );
537 }
538 
539 
540 /*! Returns a string containing the data starting at position \a start
541     of this string, extending for \a num bytes. \a num may be left out,
542     in which case the rest of the string is returned.
543 
544     If \a start is too large, an empty string is returned.
545 */
546 
mid(uint start,uint num) const547 EString EString::mid( uint start, uint num ) const
548 {
549     if ( !d )
550         num = 0;
551     else if ( num > d->len || start + num > d->len )
552         num = d->len - start;
553 
554     EString result;
555     if ( !num || start >= length() )
556         return result;
557 
558     d->max = 0;
559     result.d = new EStringData;
560     result.d->str = d->str + start;
561     result.d->len = num;
562     return result;
563 }
564 
565 
566 /*! Returns true is the string is quoted with \a c (default '"') as
567     quote character and \a q (default '\') as escape character. \a c
568     and \a q may be the same. */
569 
isQuoted(char c,char q) const570 bool EString::isQuoted( char c, char q ) const
571 {
572     if ( length() < 2 || at( 0 ) != c || at( length() - 1 ) != c )
573         return false;
574     // skip past double escapes
575     uint i = length() - 2;
576     while ( i > 1 && d->str[i] == q && d->str[i-1] == q )
577         i = i - 2;
578     // empty string left?
579     if ( i == 0 )
580         return true;
581     // trailing quote escaped?
582     if ( d->str[i] == q )
583         return false;
584     return true;
585 }
586 
587 
588 /*! Returns the unquoted representation of the string if it isQuoted()
589     and the string itself else.
590 
591     \a c at the start and end are removed; any occurence of \a c
592     within the string is left alone; an occurence of \a q followed by
593     \a c is converted into just \a c.
594 */
595 
unquoted(char c,char q) const596 EString EString::unquoted( char c, char q ) const
597 {
598     if ( !isQuoted( c, q ) )
599         return *this;
600     EString r;
601     r.reserve( length() );
602     uint i = 1;
603     while ( i < length()-1 ) {
604         if ( d->str[i] == q )
605             i++;
606         r.append( d->str[i] );
607         i++;
608     }
609     return r;
610 }
611 
612 
613 /*! Returns a version of this string quited with \a c, and where any
614     occurences of \a c or \a q are escaped with \a q.
615 */
616 
quoted(char c,char q) const617 EString EString::quoted( char c, char q ) const
618 {
619     EString r;
620     r.reserve( length()+2 );
621     r.append( c );
622     uint i = 0;
623     while ( i < length() ) {
624         if ( d->str[i] == c || d->str[i] == q )
625             r.append( q );
626         r.append( d->str[i] );
627         i++;
628     }
629     r.append( c );
630     return r;
631 }
632 
633 
634 /*! Returns true if this string is really boring, and false if it's
635     empty or contains at least one character that may warrant quoting
636     in some context. So far RFC 822 atoms, 2822 atoms, IMAP atoms and
637     MIME tokens are considered.
638 
639     This function considers the intersection of those character
640     classes to be the Totally boring subset. If \a b is not its
641     default value, it may include other characters.
642 */
643 
boring(Boring b) const644 bool EString::boring( Boring b ) const
645 {
646     if ( isEmpty() )
647         return false; // empty strings aren't boring - they may need quoting
648     uint i = 0;
649     bool exciting = false;
650     while ( i < length() && !exciting ) {
651         switch ( d->str[i] ) {
652         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
653         case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
654         case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
655         case 'V': case 'W': case 'X': case 'Y': case 'Z':
656 
657         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
658         case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
659         case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
660         case 'v': case 'w': case 'x': case 'y': case 'z':
661 
662         case '0': case '1': case '2': case '3': case '4':
663         case '5': case '6': case '7': case '8': case '9':
664 
665         case '!':
666         case '#':
667         case '$':
668         case '&':
669         case '+':
670         case '-':
671             break;
672 
673         case '.':
674             if ( b != MIME )
675                 exciting = true;
676             break;
677 
678         default:
679             exciting = true;
680             break;
681         }
682         i++;
683     }
684     if ( exciting ) // if we saw an exiting character...
685         return false;
686     return true;
687 }
688 
689 
690 /*! Returns a copy of this string where each run of whitespace is
691     compressed to a single ASCII 32, and where leading and trailing
692     whitespace is removed altogether.
693 */
694 
simplified() const695 EString EString::simplified() const
696 {
697     // scan for the first nonwhitespace character
698     uint i = 0;
699     uint first = 0;
700     while ( i < length() && first == i ) {
701         char c = d->str[i];
702         if ( c == 9 || c == 10 || c == 13 || c == 32 )
703             first++;
704         i++;
705     }
706     // scan on to find the last nonwhitespace character and detect any
707     // sequences of two or more whitespace characters within the
708     // string.
709     uint last = first;
710     uint spaces = 0;
711     bool identity = true;
712     while ( identity && i < length() ) {
713         char c = d->str[i];
714         if ( c == 9 || c == 10 || c == 13 || c == 32 ) {
715             spaces++;
716         }
717         else {
718             if ( spaces > 1 )
719                 identity = false;
720             spaces = 0;
721             last = i;
722         }
723         i++;
724     }
725     if ( identity )
726         return mid( first, last+1-first );
727 
728     EString result;
729     result.reserve( length() );
730     i = 0;
731     spaces = 0;
732     while ( i < length() ) {
733         char c = d->str[i];
734         if ( c == 9 || c == 10 || c == 13 || c == 32 ) {
735             spaces++;
736         }
737         else {
738             if ( spaces && !result.isEmpty() )
739                 result.append( ' ' );
740             spaces = 0;
741             result.append( c );
742         }
743         i++;
744     }
745     return result;
746 }
747 
748 
749 /*! Returns a copy of this string where leading and trailing
750     whitespace have been removed.
751 */
752 
trimmed() const753 EString EString::trimmed() const
754 {
755     uint i = 0;
756     uint first = length();
757     uint last = 0;
758     while ( i < length() ) {
759         char c = d->str[i];
760         if ( c != 9 && c != 10 && c != 13 && c != 32 ) {
761             if ( i < first )
762                 first = i;
763             if ( i > last )
764                 last = i;
765         }
766         i++;
767     }
768 
769     if ( last >= first )
770         return mid( first, last + 1 - first );
771 
772     EString empty;
773     return empty;
774 }
775 
776 
777 /*! Returns a copy of this EString with at most one trailing LF or CRLF
778     removed. If there's more than one LF or CRLF, the remainder are
779     left.
780 */
781 
stripCRLF() const782 EString EString::stripCRLF() const
783 {
784     uint n = 0;
785     if ( endsWith( "\r\n" ) )
786         n = 2;
787     else if ( endsWith( "\n" ) )
788         n = 1;
789 
790     return mid( 0, length() - n );
791 }
792 
793 
794 /*! Returns the lowercase-hexadecimal representation of the string. */
795 
hex() const796 EString EString::hex() const
797 {
798     EString s;
799     s.reserve( length()*2 );
800 
801     uint i = 0;
802     while ( i < length() ) {
803         uint x = d->str[i];
804         s.appendNumber( x/16, 16 );
805         s.appendNumber( x&15, 16 );
806         i++;
807     }
808 
809     return s;
810 }
811 
operator +(const EString & a,const EString & b)812 const EString operator+( const EString & a, const EString & b )
813 {
814     EString result;
815     result.reserve( a.length() + b.length() );
816     result.append( a );
817     result.append( b );
818     return result;
819 }
820 
821 
822 /*! Returns true if this string starts with \a prefix, and false if it
823     does not.
824 */
825 
startsWith(const EString & prefix) const826 bool EString::startsWith( const EString & prefix ) const
827 {
828     return length() >= prefix.length() &&
829         prefix == mid( 0, prefix.length() );
830 }
831 
832 
833 /*! Returns true if this string starts with \a prefix, and false if it
834     does not.
835 */
836 
startsWith(const char * prefix) const837 bool EString::startsWith( const char * prefix ) const
838 {
839     if ( !prefix )
840         return true;
841     uint i = 0;
842     while ( prefix[i] && prefix[i] == at( i ) )
843         i++;
844     if ( prefix[i] )
845         return false;
846     return true;
847 }
848 
849 
850 /*! Returns true if this string ends with \a suffix, and false if it
851     does not.
852 */
853 
endsWith(const EString & suffix) const854 bool EString::endsWith( const EString & suffix ) const
855 {
856     return length() >= suffix.length() &&
857         suffix == mid( length()-suffix.length() );
858 }
859 
860 
861 /*! Returns true if this string ends with \a suffix, and false if it
862     does not.
863 */
864 
endsWith(const char * suffix) const865 bool EString::endsWith( const char * suffix ) const
866 {
867     if ( !suffix )
868         return true;
869     uint l = strlen( suffix );
870     if ( l > length() )
871         return false;
872     uint i = 0;
873     while ( i < l && suffix[i] == d->str[d->len - l + i] )
874         i++;
875     if ( i < l )
876         return false;
877     return true;
878 }
879 
880 
881 /*! Returns the number encoded by this string, and sets \a *ok to true
882     if that number is valid, or to false if the number is invalid. By
883     default the number is encoded in base 10, if \a base is specified
884     that base is used. \a base must be at least 2 and at most 36.
885 
886     If the number is invalid (e.g. negative), number() returns 0.
887 
888     If \a ok is a null pointer, it is not modified.
889 */
890 
number(bool * ok,uint base) const891 uint EString::number( bool * ok, uint base ) const
892 {
893     uint i = 0;
894     uint n = 0;
895 
896     bool good = !isEmpty();
897     while ( good && i < length() ) {
898         if ( d->str[i] < '0' || d->str[i] > 'z' )
899             good = false;
900 
901         uint digit = d->str[i] - '0';
902 
903         // hex or something?
904         if ( digit > 9 ) {
905             uint c = d->str[i];
906             if ( c > 'Z' )
907                 c = c - 32;
908             digit = c - 'A' + 10;
909         }
910 
911         // is the digit too large?
912         if ( digit >= base )
913             good = false;
914 
915         // Would n overflow if we multiplied by 10 and added digit?
916         if ( n > UINT_MAX/base )
917             good = false;
918         n *= base;
919         if ( n >= (UINT_MAX - UINT_MAX % base) && digit > (UINT_MAX % base) )
920             good = false;
921         n += digit;
922 
923         i++;
924     }
925 
926     if ( !good )
927         n = 0;
928 
929     if ( ok )
930         *ok = good;
931 
932     return n;
933 }
934 
935 
936 /*! Returns a string representing the number \a n in the \a base
937     system, which is 10 (decimal) by default and must be in the range
938     2-36.
939 
940     For 0, "0" is returned.
941 
942     For bases 11-36, lower-case letters are used for the digits beyond
943     9.
944 */
945 
fromNumber(int64 n,uint base)946 EString EString::fromNumber( int64 n, uint base )
947 {
948     EString r;
949     r.appendNumber( n, base );
950     return r;
951 }
952 
953 
954 
955 /*! Converts \a n to a number in the \a base system and appends the
956     result to this string. If \a n is 0, "0" is appended.
957 
958     Uses lower-case for digits above 9.
959 */
960 
appendNumber(int64 n,uint base)961 void EString::appendNumber( int64 n, uint base )
962 {
963     int64 top = 1;
964     while ( top * base <= n )
965         top = base * top;
966     while ( top ) {
967         uint d = ( n / top ) % base;
968         char  c = '0' + d;
969         if ( d > 9 )
970             c = 'a' + d - 10;
971         append( c );
972         top = top / base;
973     }
974 }
975 
976 
977 /*! Returns an \a e encoded version of this EString. If \a e is Base64,
978     then \a n specifies the maximum line length.
979     The default is 0, i.e. no limit.
980 
981     This function does not support Uuencode. If \a e is Uuencode, it
982     returns the input string.
983 */
984 
encoded(Encoding e,uint n) const985 EString EString::encoded( Encoding e, uint n ) const
986 {
987     if ( e == Base64 )
988         return e64( n );
989     else if ( e == QP )
990         return eQP( false, n > 0 );
991     return *this;
992 }
993 
994 
995 /*! Returns a \a e decoded version of this EString. */
996 
decoded(Encoding e) const997 EString EString::decoded( Encoding e ) const
998 {
999     if ( e == Base64 )
1000         return de64();
1001     else if ( e == QP )
1002         return deQP();
1003     else if ( e == Uuencode )
1004         return deUue();
1005     return *this;
1006 }
1007 
1008 
1009 /*! Returns a version of this EString with absolutely nothing changed.
1010     (This function is eventually intended to percent-escape URIs, the
1011     opposite of deURI().)
1012 */
1013 
eURI() const1014 EString EString::eURI() const
1015 {
1016     return *this;
1017 }
1018 
1019 
1020 /*! Returns a version of this EString with every %xx escape replaced with
1021     the corresponding character (as used to encode URIs). Invalid escape
1022     sequences are left unchanged, so this function cannot be used for
1023     input from potentially malevolent sources.
1024 */
1025 
deURI() const1026 EString EString::deURI() const
1027 {
1028     uint l = length();
1029 
1030     EString s;
1031     s.reserve( l );
1032 
1033     uint p = 0;
1034     while ( p < l ) {
1035         char c = d->str[p];
1036         if ( c == '%' ) {
1037             bool ok;
1038             uint n = mid( p+1, 2 ).number( &ok, 16 );
1039             if ( ok && l > p + 2 ) {
1040                 p += 2;
1041                 c = (char)n;
1042             }
1043         }
1044         s.append( c );
1045         p++;
1046     }
1047 
1048     return s;
1049 }
1050 
1051 
1052 /*! An implementation of uudecode, sufficient to handle some
1053     occurences of "content-transfer-encoding: x-uuencode"
1054     seen. Possibly not correct according to POSIX 1003.2b, who knows.
1055 */
1056 
deUue() const1057 EString EString::deUue() const
1058 {
1059     if ( isEmpty() )
1060         return *this;
1061     uint i = 0;
1062     if ( !startsWith( "begin" ) ) {
1063         int begin = find( "\nbegin" );
1064         if ( begin < 0 )
1065             begin = find( "\rbegin" );
1066         if ( begin < 0 )
1067             return *this;
1068         i = (uint)begin+1;
1069     }
1070     EString r;
1071     while ( i < d->len ) {
1072         // step 0. skip over nonspace until CR/LF
1073         while ( i < d->len && d->str[i] != 13 && d->str[i] != 10 )
1074             i++;
1075         // step 1. skip over whitespace to the next length marker.
1076         while ( i < d->len &&
1077                 ( d->str[i] == 9 || d->str[i] == 10 ||
1078                   d->str[i] == 13 || d->str[i] == 32 ) )
1079             i++;
1080         // step 2. the length byte, or the end line.
1081         uint linelength = 0;
1082         if ( i < d->len ) {
1083             char c = d->str[i];
1084             if ( c == 'e' && i < d->len - 2 &&
1085                  d->str[i+1] == 'n' && d->str[i+2] == 'd' &&
1086                  ( i + 3 == d->len ||
1087                    d->str[i+3] == 13 || d->str[i+3] == 10 ||
1088                    d->str[i+3] == 9 || d->str[i+3] == 32 ) )
1089                 return r;
1090             else if ( c < 32 )
1091                 return *this;
1092             else
1093                 linelength = (c - 32) & 63;
1094             i++;
1095         }
1096         // step 3. the line data. we assume it's in groups of 4 tokens.
1097         while ( linelength && i < d->len ) {
1098             char c0 = 0, c1 = 0, c2 = 0, c3 = 0;
1099             if ( i < d->len )
1100                 c0 = 63 & ( d->str[i] - 32 );
1101             if ( i+1 < d->len )
1102                 c1 = 63 & ( d->str[i+1] - 32 );
1103             if ( i+2 < d->len )
1104                 c2 = 63 & ( d->str[i+2] - 32 );
1105             if ( i+3 < d->len )
1106                 c3 = 63 & ( d->str[i+3] - 32 );
1107             i += 4;
1108             if ( linelength > 0 ) {
1109                 r.append( ( (c0 << 2) | (c1 >> 4) ) & 255 );
1110                 linelength--;
1111             }
1112             if ( linelength > 0 ) {
1113                 r.append( ( (c1 << 4) | (c2 >> 2) ) & 255 );
1114                 linelength--;
1115             }
1116             if ( linelength > 0 ) {
1117                 r.append( ( (c2 << 6) | (c3     ) ) & 255 );
1118                 linelength--;
1119             }
1120         }
1121     }
1122     // we ran off the end without seeing an end line. what to do?
1123     // return what we've seen so far?
1124     return r;
1125 }
1126 
1127 
1128 
1129 static char from64[128] =
1130 {
1131     64, 99, 99, 99,  99, 99, 99, 99,
1132     65, 99, 65, 99,  99, 65, 99, 99,
1133     99, 99, 99, 99,  99, 99, 99, 99,
1134     99, 99, 99, 99,  99, 99, 99, 99,
1135 
1136         // 32
1137     99, 99, 99, 99,  99, 99, 99, 99,
1138     99, 99, 99, 62,  99, 99, 99, 63,
1139     52, 53, 54, 55,  56, 57, 58, 59,
1140     60, 61, 99, 99,  99, 64, 99, 99,
1141 
1142         // 64
1143     99,  0,  1,  2,   3,  4,  5,  6,
1144      7,  8,  9, 10,  11, 12, 13, 14,
1145     15, 16, 17, 18,  19, 20, 21, 22,
1146     23, 24, 25, 99,  99, 99, 99, 99,
1147 
1148         // 96
1149     99, 26, 27, 28,  29, 30, 31, 32,
1150     33, 34, 35, 36,  37, 38, 39, 40,
1151     41, 42, 43, 44,  45, 46, 47, 48,
1152     49, 50, 51, 99,  99, 99, 99, 99
1153 };
1154 
1155 
1156 
1157 /*! Decodes this string using the base-64 algorithm and returns the result. */
1158 
de64() const1159 EString EString::de64() const
1160 {
1161     // this code comes from mailchen, adapted for EString.
1162     EString result;
1163     result.reserve( length() * 3 / 4 + 20 ); // 20 = fudge
1164     EString body;
1165     uint bp = 0;
1166     uint decoded = 0;
1167     int m = 0;
1168     uint p = 0;
1169     bool done = false;
1170     while ( p < length() && !done ) {
1171         uint c = d->str[p++];
1172         if ( c <= 'z' )
1173             c = from64[c];
1174         if ( c < 64 ) {
1175             switch ( m ) {
1176             case 0:
1177                 decoded = c << 2;
1178                 break;
1179             case 1:
1180                 decoded += ( (c & 0xf0) >> 4 );
1181                 result.d->str[bp++] = decoded;
1182                 decoded = (c & 15) << 4;
1183                 break;
1184             case 2:
1185                 decoded += ( (c & 0xfc) >> 2 );
1186                 result.d->str[bp++] = decoded;
1187                 decoded = (c & 3) << 6;
1188                 break;
1189             case 3:
1190                 decoded += c;
1191                 result.d->str[bp++] = decoded;
1192                 break;
1193             }
1194             m = (m+1)&3;
1195         }
1196         else if ( c == 64 ) {
1197             done = true;
1198         }
1199         else if ( c == 65 ) {
1200             // white space; perfectly normal and may be ignored.
1201         }
1202         else {
1203             // we're supposed to ignore all other characters. so
1204             // that's what we do, even though it may not be ideal in
1205             // all cases... consider that later.
1206         }
1207     }
1208     result.d->len = bp;
1209     return result;
1210 }
1211 
1212 
1213 static char to64[65] =
1214     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1215 
1216 /*! Encodes this string using the base-64 algorithm and returns the
1217     result in lines of at most \a lineLength characters. If \a
1218     lineLength is not supplied, e64() returns a single line devoid of
1219     whitespace.
1220 */
1221 
e64(uint lineLength) const1222 EString EString::e64( uint lineLength ) const
1223 {
1224     // this code comes from mailchen, adapted for EString
1225     int l = length();
1226     int i = 0;
1227     EString r;
1228     r.reserve( l*2 );
1229     int p = 0;
1230     uint c = 0;
1231     while ( i <= l-3 ) {
1232         r.d->str[p++] = to64[ ((d->str[i]>>2))&63 ];
1233         r.d->str[p++] = to64[ ((d->str[i]<<4)&48) + ((d->str[i+1]>>4)&15) ];
1234         r.d->str[p++] = to64[ ((d->str[i+1]<<2)&60) + ((d->str[i+2]>>6)&3) ];
1235         r.d->str[p++] = to64[ (d->str[i+2]&63) ];
1236         i += 3;
1237         c += 4;
1238         if ( lineLength > 0 && c >= lineLength ) {
1239             r.d->str[p++] = 13;
1240             r.d->str[p++] = 10;
1241             c = 0;
1242         }
1243     }
1244     if ( i < l ) {
1245         int i0, i1, i2;
1246         i0 = d->str[i];
1247         i1 = i+1 < l ? d->str[i+1] : 0;
1248         i2 = i+2 < l ? d->str[i+2] : 0;
1249         r.d->str[p++] = to64[ ((i0>>2))&63 ];
1250         r.d->str[p++] = to64[ ((i0<<4)&48) + ((i1>>4)&15) ];
1251         if ( i+1 < l )
1252             r.d->str[p++] = to64[ ((i1<<2)&60) + ((i2>>6)&3) ];
1253         else
1254             r.d->str[p++] = '=';
1255         if ( i+2 < l )
1256             r.d->str[p++] = to64[ (i2&63) ];
1257         else
1258             r.d->str[p++] = '=';
1259     }
1260     if ( lineLength > 0 && c > 0 ) {
1261         r.d->str[p++] = 13;
1262         r.d->str[p++] = 10;
1263     }
1264     r.d->len = p;
1265     return r;
1266 }
1267 
1268 
1269 /*! Decodes this string according to the quoted-printable algorithm,
1270     and returns the result. Errors are overlooked, to cope with all
1271     the mail-munging brokenware in the great big world.
1272 
1273     If \a underscore is true, underscores in the input are translated
1274     into spaces (as specified in RFC 2047).
1275 */
1276 
deQP(bool underscore) const1277 EString EString::deQP( bool underscore ) const
1278 {
1279     uint i = 0;
1280     EString r;
1281     r.reserve( length() );
1282     while ( i < length() ) {
1283         if ( d->str[i] != '=' ) {
1284             char c = d->str[i++];
1285             if ( underscore && c == '_' )
1286                 c = ' ';
1287             r.d->str[r.d->len++] = c;
1288         }
1289         else {
1290             // are we looking at = followed by end-of-line?
1291             bool ok = false;
1292             uint c = 0;
1293             bool eol = false;
1294             uint j = i+1;
1295             // skip possibly appended whitespace first
1296             while ( j < length() &&
1297                     ( d->str[j] == ' ' || d->str[j] == '\t' ) )
1298                 j++;
1299             // there are two types of soft EOLs:
1300             if ( j < d->len && d->str[j] == 10 ) {
1301                 eol = true;
1302                 j++;
1303             }
1304             else if ( j < d->len-1 && d->str[j] == 13 && d->str[j+1] == 10 ) {
1305                 eol = true;
1306                 j = j + 2;
1307             }
1308             else if ( i + 2 < d->len ) {
1309                 // ... and one common case: a two-digit hex number, not EOL
1310                 c = mid( i+1, 2 ).number( &ok, 16 );
1311             }
1312 
1313             // write the proper decoded string and increase i.
1314             if ( eol ) { // ... if it's a soft EOL
1315                 i = j;
1316             }
1317             else if ( ok ) { // ... or if it's a two-digit hex number
1318                 r.d->str[r.d->len++] = c;
1319                 i = i + 3;
1320             }
1321             else { // ... or if it's an error... we overlook it
1322                 r.d->str[r.d->len++] = d->str[i++];
1323             }
1324         }
1325     }
1326     return r;
1327 }
1328 
1329 
1330 static char qphexdigits[17] = "0123456789ABCDEF";
1331 
1332 
maybeBoundary(const EString & s,uint i)1333 static bool maybeBoundary( const EString & s, uint i ) {
1334     if ( s.length() < i + 2 )
1335         return false;
1336     if ( s[i] != '-' || s[i+1] != '-' )
1337         return false;
1338 
1339     while ( i < s.length() && s[i] >= ' ' ) {
1340         //bchars := bcharsnospace / " "
1341         //bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" /
1342         //                 "+" / "_" / "," / "-" / "." /
1343         //                 "/" / ":" / "=" / "?"
1344         switch( s[i] ) {
1345         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1346         case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1347         case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1348         case 'v': case 'w': case 'x': case 'y': case 'z':
1349         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1350         case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1351         case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1352         case 'V': case 'W': case 'X': case 'Y': case 'Z':
1353         case '0': case '1': case '2': case '3': case '4': case '5': case '6':
1354         case '7': case '8': case '9':
1355         case '\'':
1356         case '(': case ')': case '+': case '_': case ',': case '-': case '.':
1357         case '/': case ':': case '=': case '?':
1358         case ' ':
1359             // ok
1360             break;
1361         default:
1362             return false;
1363         }
1364         ++i;
1365     }
1366     return true;
1367 }
1368 
1369 
1370 /*! Encodes this string using the quoted-printable algorithm and
1371     returns the encoded version. In the encoded version, all line
1372     feeds are CRLF, and soft line feeds are positioned so that the q-p
1373     looks as good as it can.
1374 
1375     Note that this function is slightly incompatible with RFC 2646: It
1376     encodes trailing spaces, as suggested in RFC 2045, but RFC 2646
1377     suggest that if trailing spaces are the only reason to q-p, then
1378     the message should not be encoded.
1379 
1380     If \a underscore is present and true, this function uses the variant
1381     of q-p specified by RFC 2047, where a space is encoded as an
1382     underscore and a few more characters need to be encoded.
1383 
1384     If \a from is present and true, this function also makes sure that
1385     no output line starts with "From " or looks like a MIME boundary.
1386 */
1387 
eQP(bool underscore,bool from) const1388 EString EString::eQP( bool underscore, bool from ) const
1389 {
1390     if ( isEmpty() )
1391         return *this;
1392     uint i = 0;
1393     EString r;
1394     // no input character can use more than six output characters (=
1395     // CR LF = 3 D), so we allocate as much space as we could possibly
1396     // need.
1397     r.reserve( length()*6 );
1398     uint c = 0;
1399     while ( i < d->len ) {
1400         if ( d->str[i] == 10 ||
1401              ( i < d->len-1 && d->str[i] == 13 && d->str[i+1] == 10 ) ) {
1402             // we have a line feed. if the last character on the line
1403             // was a space, we need to quote that to protect it.
1404             if ( r.d->len > 0 && r.d->str[r.d->len-1] == ' ' ) {
1405                 r.d->str[r.d->len-1] = '=';
1406                 r.d->str[r.d->len++] = '2';
1407                 r.d->str[r.d->len++] = '0';
1408             }
1409             c = 0;
1410             if ( d->str[i] == 13 )
1411                 r.d->str[r.d->len++] = d->str[i++];
1412             r.d->str[r.d->len++] = 10;
1413             // worst case: five bytes
1414         }
1415         else {
1416             if ( c > 72 ) {
1417                 uint j = 1;
1418                 while ( j < 10 && r.d->str[r.d->len-j] != ' ' )
1419                     j++;
1420                 if ( j >= 10 )
1421                     j = 0;
1422                 else
1423                     j--;
1424                 uint k = 1;
1425                 while ( k <= j ) {
1426                     r.d->str[r.d->len - k + 3] = r.d->str[r.d->len - k];
1427                     k++;
1428                 }
1429                 // always CRLF for soft linefeed
1430                 r.d->str[r.d->len++ - j] = '=';
1431                 r.d->str[r.d->len++ - j] = 13;
1432                 r.d->str[r.d->len++ - j] = 10;
1433                 c = j;
1434             }
1435 
1436             if ( underscore && d->str[i] == ' ' ) {
1437                 r.d->str[r.d->len++] = '_';
1438                 c += 1;
1439             }
1440             else if ( underscore &&
1441                       ! ( ( d->str[i] >= '0' && d->str[i] <= '9' ) ||
1442                           ( d->str[i] >= 'a' && d->str[i] <= 'z' ) ||
1443                           ( d->str[i] >= 'A' && d->str[i] <= 'Z' ) ) ) {
1444                 r.d->str[r.d->len++] = '=';
1445                 r.d->str[r.d->len++] = qphexdigits[d->str[i]/16];
1446                 r.d->str[r.d->len++] = qphexdigits[d->str[i]%16];
1447                 c += 3;
1448             }
1449             else if ( from && c == 0 && maybeBoundary( *this, i ) ) {
1450                 r.d->str[r.d->len++] = '=';
1451                 r.d->str[r.d->len++] = qphexdigits[d->str[i]/16];
1452                 r.d->str[r.d->len++] = qphexdigits[d->str[i]%16];
1453                 c += 3;
1454             }
1455             else if ( from && c == 0 && d->len >= i + 4 &&
1456                       d->str[i] == 'F' && d->str[i+1] == 'r' &&
1457                       d->str[i+2] == 'o' && d->str[i+3] == 'm' &&
1458                       d->str[i+4] == ' ' ) {
1459                 r.d->str[r.d->len++] = '=';
1460                 r.d->str[r.d->len++] = qphexdigits[d->str[i]/16];
1461                 r.d->str[r.d->len++] = qphexdigits[d->str[i]%16];
1462                 c += 3;
1463             }
1464             else if ( ( d->str[i] >= ' ' && d->str[i] < 127 &&
1465                         d->str[i] != '=' ) ||
1466                       ( d->str[i] == '\t' ) ) {
1467                 r.d->str[r.d->len++] = d->str[i];
1468                 c++;
1469             }
1470             else {
1471                 r.d->str[r.d->len++] = '=';
1472                 r.d->str[r.d->len++] = qphexdigits[d->str[i]/16];
1473                 r.d->str[r.d->len++] = qphexdigits[d->str[i]%16];
1474                 c += 3;
1475             }
1476         }
1477         i++;
1478     }
1479     return r;
1480 }
1481 
1482 
1483 /*! This function returns true if the string would need to be encoded
1484     using quoted-printable. It is a greatly simplified copy of eQP(),
1485     with the changes made necessary by RFC 2646.
1486 */
1487 
needsQP() const1488 bool EString::needsQP() const
1489 {
1490     uint i = 0;
1491     uint c = 0;
1492     while ( i < length() ) {
1493         if ( c == 0 && maybeBoundary( *this, i ) )
1494             return true;
1495         if ( c == 0 && d->str[i] == 'F' && d->str[i+1] == 'r' )
1496             return true;
1497         if ( d->str[i] == 10 )
1498              c = 0;
1499         else if ( c > 78 )
1500             return true;
1501         else if ( ( d->str[i] >= ' ' && d->str[i] < 127 ) ||
1502                   ( d->str[i] == '\t' ) ||
1503                   ( d->str[i] == 13 && d->str[i+1] == 10 ) )
1504             c++;
1505         else
1506             return true;
1507         i++;
1508     }
1509     return false;
1510 
1511 }
1512 
1513 
1514 /*! Returns -1 if this string is lexicographically before \a other, 0
1515     if they are the same, and 1 if this string is lexicographically
1516     after \a other.
1517 
1518     The comparison is case sensitive - just a byte comparison.
1519 */
1520 
compare(const EString & other) const1521 int EString::compare( const EString & other ) const
1522 {
1523     if ( d == other.d )
1524         return 0;
1525     uint i = 0;
1526     while ( i < length() && i < other.length() &&
1527             d->str[i] == other.d->str[i] )
1528         i++;
1529     if ( i >= length() && i >= other.length() )
1530         return 0;
1531     if ( i >= length() )
1532         return -1;
1533     if ( i >= other.length() )
1534         return 1;
1535     if ( d->str[i] < other.d->str[i] )
1536         return -1;
1537     return 1;
1538 }
1539 
1540 
operator <(const EString & other) const1541 bool EString::operator<( const EString & other ) const
1542 {
1543     return compare( other ) < 0;
1544 }
1545 
1546 
operator >(const EString & other) const1547 bool EString::operator>( const EString & other ) const
1548 {
1549     return compare( other ) > 0;
1550 }
1551 
1552 
operator <=(const EString & other) const1553 bool EString::operator<=( const EString & other ) const
1554 {
1555     return compare( other ) <= 0;
1556 }
1557 
1558 
operator >=(const EString & other) const1559 bool EString::operator>=( const EString & other ) const
1560 {
1561     return compare( other ) >= 0;
1562 }
1563 
1564 
operator <(const char * other) const1565 bool EString::operator<( const char * other ) const
1566 {
1567     if ( !other )
1568         return false;
1569 
1570     uint l = length();
1571     uint i = 0;
1572     while ( i < l && d->str[i] == other[i] && other[i] )
1573         i++;
1574     // four cases: we ran out of characters, other did, both did,
1575     // there's a difference
1576     if ( i == l && !other[i] )
1577         return false;
1578     if ( !other[i] )
1579         return false;
1580     if ( i == l )
1581         return true;
1582     return d->str[i] < other[i];
1583 }
1584 
1585 
1586 /*! This function is a debugging aid. It prints the contents of the
1587     string within single quotes followed by a trailing newline to
1588     stderr.
1589 */
1590 
print() const1591 void EString::print() const
1592 {
1593     uint i = 0;
1594 
1595     fprintf( stderr, "'" );
1596     while ( i < length() )
1597         fprintf( stderr, "%c", d->str[i++] );
1598     fprintf( stderr, "'\n" );
1599 }
1600 
1601 
1602 /*! Returns \a n as a string representing that number in a
1603     human-readable fashion optionally suffixed by K, M, G or T.
1604 
1605     The number is rounded more or less correctly.
1606 */
1607 
humanNumber(int64 n)1608 EString EString::humanNumber( int64 n )
1609 {
1610     if ( n < 1024 )
1611         return fromNumber( n );
1612 
1613     int64 f = 1024;
1614     char s = 'K';
1615     if ( n < 1024 * 1024 ) {
1616         // ok
1617     }
1618     else if ( n < 1024 * 1024 * 1024 ) {
1619         f = 1024 * 1024;
1620         s = 'M';
1621     }
1622     else if ( n < 1024LL * 1024 * 1024 * 1024 ) {
1623         f = 1024 * 1024 * 1024;
1624         s = 'G';
1625     }
1626     else {
1627         // terabytes. we don't use petabytes or exabytes since people
1628         // don't know their abbreviations by heart.
1629         f = 1024LL * 1024 * 1024 * 1024;
1630         s = 'T';
1631     }
1632 
1633     EString r;
1634     // if it's single-digit, we add a decimal point. since we only go
1635     // to TB, not petabyte or exabyte, we don't need to check for
1636     // INT64_MAX/10. (actually we'd only need that check for exabytes.)
1637     if ( n < f * 10 ) {
1638         n += f/20-1;
1639         r = fromNumber( n/f );
1640         uint m = (n%f)/(f/10);
1641         r.append( '.' );
1642         r.append( '0' + m );
1643     }
1644     else {
1645         n += f/2-1;
1646         r = fromNumber( n/f );
1647     }
1648     r.append( s );
1649     return r;
1650 }
1651 
1652 
1653 // all the keywords we know about, found by grepping through message/*.cpp
1654 static const char * keywords[] = {
1655     "7bit", "8bit", "alternative", "aug", "binary", "bcc", "cc", "comments",
1656     "content-description", "content-disposition", "content-id",
1657     "content-language", "content-location", "content-md5",
1658     "content-transfer-encoding", "content-type", "date", "fri", "from",
1659     "in-reply-to", "jun", "jul", "keywords", "may", "message-id",
1660     "mime-version", "mon", "orig-date",
1661     "received", "references", "reply-to", "resent-bcc", "resent-cc",
1662     "resent-date", "resent-from", "resent-message-id", "resent-sender",
1663     "resent-to", "return-path", "sender", "sep", "subject", "to",
1664     "us-ascii", "adt", "akdt", "akst", "apr", "ast", "attachment",
1665     "base64", "body", "boundary", "brt", "bst", "bytes", "cadt", "cast",
1666     "cct", "cdt", "ces", "cest", "cet", "charset", "cst", "cut", "data",
1667     "dec", "deleted", "digest", "eadt", "east", "edt", "eet", "est",
1668     "feb", "flag", "fri", "gmt", "grnlnddt", "grnlndst", "hadt", "hast",
1669     "helo", "hkt", "hst", "html", "id", "idate", "inline", "jan", "jst",
1670     "kdt", "kst", "lhlo", "lines", "lockuidnext", "mar", "mdt", "message",
1671     "mest", "mesz", "met", "metdst", "mez", "mezt", "mon", "msd", "msk",
1672     "mst", "multipart", "name", "ndt", "nov", "nst", "nzdt", "nzst", "oct",
1673     "part", "plain", "pdt", "pst", "quit", "quoted-printable", "rawbytes",
1674     "rfc822", "rfc822size", "root", "sast", "sat", "seen", "sep",
1675     "supplied", "text", "tue", "uid", "us-ascii", "ut", "utc", "value",
1676     "wadt", "wast", "wed", "wet", "ydt", "yst",
1677     "mixed",
1678     0
1679 };
1680 
1681 // helper for EString::anonymised()
isMungableChar(char c)1682 static inline bool isMungableChar( char c ) {
1683     if ( ( c >= 'a' && c <= 'z' ) ||
1684          ( c >= 'A' && c <= 'Z' ) ||
1685          ( c >= '0' && c <= '9' ) ||
1686          ( c == '=' ||
1687            c == '"' ||
1688            c == ':' ||
1689            c == '?' ||
1690            c == '-' ||
1691            c == '(' ||
1692            c == ')' ||
1693            c == '_' ) )
1694         return true;
1695     return false;
1696 }
1697 
1698 
1699 /*! Returns a copy of this string where most/all content has been
1700     replaced with the letter 'x' or the digit '4', but if the message
1701     was an RFC 822 message, it keeps the same parse tree.
1702 
1703     Specifically, most ASCII words are changed to xxxx, while most/all
1704     syntax elements are kept.
1705 
1706     This function is very, very slow. That's okay since it's only used
1707     for sending bug reports to us, and we all know, that's not a common
1708     case.
1709 */
1710 
anonymised() const1711 EString EString::anonymised() const
1712 {
1713     uint b = 0;
1714     EString r;
1715     while ( b < length() ) {
1716         uint e = b;
1717         while ( e < d->len && ( d->str[e] > 127 ||
1718                                 isMungableChar( d->str[e] ) ) )
1719             e++;
1720         // we have a word.
1721         bool munge = true;
1722         if ( e == b )
1723             munge = false;
1724 
1725         if ( munge && d->str[e-1] == ':' ) // header field names
1726             munge = false;
1727 
1728         if ( munge ) { // mime parameters
1729             uint i = b;
1730             while ( i < e && d->str[i] != '"' && d->str[i] != '=' )
1731                 i++;
1732             if ( i < e )
1733                 munge = false;
1734         }
1735 
1736         if ( munge && // boundary lines
1737              b + 2 <= e &&
1738              d->str[b] == '-' && d->str[b+1] == '-' ) {
1739             munge = false;
1740         }
1741 
1742         if ( munge ) { // any keyword
1743             EString m = mid( b, e-b ).lower();
1744             uint i = 0;
1745             while ( keywords[i] && m != keywords[i] )
1746                 i++;
1747             if ( keywords[i] )
1748                 munge = false;
1749         }
1750 
1751         if ( munge ) { // any word containing non-ascii
1752             uint i = b;
1753             while ( i < e && d->str[i] < 128 )
1754                 i++;
1755             if ( i < e )
1756                 munge = false;
1757         }
1758 
1759         if ( munge ) {
1760             uint i = 0;
1761             while ( b + i < e ) {
1762                 char c = d->str[b+i];
1763                 if ( c >= 'a' && c <= 'z' )
1764                     r.append( 'a' + (i%26) );
1765                 else if ( c >= 'A' && c <= 'Z' )
1766                     r.append( 'a' + (i%26) );
1767                 else
1768                     r.append( c );
1769                 i++;
1770             }
1771         }
1772         else {
1773             r.append( mid( b, e-b ) );
1774         }
1775         b = e;
1776 
1777         while ( b < d->len && !isMungableChar( d->str[b] ) ) {
1778             r.append( d->str[b] );
1779             b++;
1780         }
1781     }
1782 
1783     return r;
1784 }
1785 
1786 
1787 
1788 /*! Returns a copy of this string where every linefeed is CRLF, and
1789     where the last two characters are CRLF.
1790 */
1791 
crlf() const1792 EString EString::crlf() const
1793 {
1794     bool copy = true;
1795     if ( length() < 2 ||
1796          d->str[d->len-1] != 10 ||
1797          d->str[d->len-2] != 13 )
1798         copy = false;
1799     uint i = 0;
1800     while ( copy && i < d->len ) {
1801         if ( d->str[i] == 13 && i < d->len && d->str[i+1] == 10 )
1802             i += 2;
1803         else if ( d->str[i] == 13 || d->str[i] == 10 )
1804             copy = false;
1805         else
1806             i++;
1807     }
1808     if ( copy )
1809         return *this;
1810 
1811     EString r;
1812     r.reserve( length() );
1813     r.append( mid( 0, i ) );
1814     bool lf = false;
1815     uint len = 0;
1816     if ( d )
1817         len = d->len;
1818     while ( i < len ) {
1819         lf = false;
1820         char c = d->str[i++];
1821 
1822         if ( c == 10 ) {
1823             lf = true;
1824         }
1825         else if ( c == 13 ) {
1826             lf = true;
1827             if ( i < d->len && d->str[i] == 10 )
1828                 i++;
1829             else if ( i < d->len-1 &&
1830                       d->str[i] == 13 && d->str[i+1] == 10 )
1831                 i += 2;
1832         }
1833 
1834         if ( lf )
1835             r.append( "\r\n" );
1836         else
1837             r.append( c );
1838     }
1839     if ( !lf )
1840         r.append( "\r\n" );
1841 
1842     return r;
1843 }
1844 
1845 
1846 /*! Returns true if this string contains at least one instance of \a s. */
1847 
contains(const EString & s) const1848 bool EString::contains( const EString & s ) const
1849 {
1850     if ( find( s ) >= 0 )
1851         return true;
1852     return false;
1853 }
1854 
1855 
1856 /*! Returns true if this string contains at least one instance of \a c. */
1857 
contains(const char c) const1858 bool EString::contains( const char c ) const
1859 {
1860     if ( find( c ) >= 0 )
1861         return true;
1862     return false;
1863 }
1864 
1865 
1866 /*! Returns true if this string contains at least one instance of \a
1867     s, and the characters before and after the occurence aren't
1868     letters.
1869 */
1870 
containsWord(const EString & s) const1871 bool EString::containsWord( const EString & s ) const
1872 {
1873     int i = find( s );
1874     while ( i >= 0 ) {
1875         bool before = false;
1876         bool after = false;
1877         if ( i == 0 ) {
1878             before = true;
1879         }
1880         else {
1881             char c = d->str[i-1];
1882             if ( c < 'A' || ( c > 'Z' && c < 'a' ) || c > 'z' )
1883                 before = true;
1884         }
1885         if ( i + s.length() == length() ) {
1886             after = true;
1887         }
1888         else {
1889             char c = d->str[i+s.length()];
1890             if ( c < 'A' || ( c > 'Z' && c < 'a' ) || c > 'z' )
1891                 after = true;
1892         }
1893         if ( before && after )
1894             return true;
1895         i = find( s, i+1 );
1896     }
1897     return false;
1898 }
1899 
1900 
1901 /*! Returns a copy of this string wrapped so that each line contains
1902     at most \a linelength characters. The first line is prefixed by \a
1903     firstPrefix, subsequent lines by \a otherPrefix. If \a spaceAtEOL
1904     is true, all lines except the last end with a space.
1905 
1906     The prefixes are counted towards line length, but the optional
1907     trailing space is not.
1908 
1909     Only space (ASCII 32) is a line-break opportunity. If there are
1910     multiple spaces where a line is broken, all the spaces are
1911     replaced by a single CRLF. Linefeeds added use CRLF.
1912 */
1913 
wrapped(uint linelength,const EString & firstPrefix,const EString & otherPrefix,bool spaceAtEOL) const1914 EString EString::wrapped( uint linelength,
1915                         const EString & firstPrefix, const EString & otherPrefix,
1916                         bool spaceAtEOL ) const
1917 {
1918     // result must be modifiable() at all times, otherwise we allocate
1919     // all the RAM.
1920 
1921     // working:
1922     EString result;
1923     result.reserve( length() );
1924     result.append( firstPrefix );
1925     // broken but should work. needs investigation.
1926     // EString result = firstPrefix;
1927     // result.reserve( length() );
1928 
1929     // move is where we keep the text that has to be moved to the next
1930     // line. it too should be modifiable() all the time.
1931     EString move;
1932     uint i = 0;
1933     uint linestart = 0;
1934     uint space = 0;
1935     while ( i < length() ) {
1936         char c = at( i );
1937         if ( c == ' ' )
1938             space = result.length();
1939         else if ( c == '\n' )
1940             linestart = result.length() + 1;
1941         result.append( c );
1942         i++;
1943         // add a soft linebreak?
1944         if ( result.length() > linestart + linelength && space > linestart ) {
1945             while ( space > 0 && result[space-1] == ' ' )
1946                 space--;
1947             linestart = space + 1;
1948             while ( result[linestart] == ' ' )
1949                 linestart++;
1950             move.truncate();
1951             if ( result.length() > linestart )
1952                 move.append( result.cstr() + linestart );
1953             if ( spaceAtEOL )
1954                 result.truncate( space + 1 );
1955             else
1956                 result.truncate( space );
1957             result.append( "\r\n" );
1958             result.append( otherPrefix );
1959             result.append( move );
1960         }
1961     }
1962     return result;
1963 }
1964 
1965 
1966 /*! Replaces all occurences of \a a in this string with \a b. Rather
1967     slow and allocates much memory. Could be optimised if it ever
1968     shows up on the performance graphs.
1969 
1970     \a a must not be empty.
1971 
1972     Replaced sections are not considered when looking for the next
1973     match.
1974 */
1975 
replace(const EString & a,const EString & b)1976 void EString::replace( const EString & a, const EString & b )
1977 {
1978     if ( a == b)
1979         return; // noop
1980     if ( a.isEmpty() )
1981         return; // infinite loop
1982 
1983     int i = find( a );
1984     while ( i >= 0 ) {
1985         EString r = mid( i+a.length() );
1986         truncate( i );
1987         append( b );
1988         append( r );
1989         i = find( a, i + b.length() );
1990     }
1991 }
1992