1 // Copyright 2009 The Archiveopteryx Developers <info@aox.org>
2
3 #include "estring.h"
4
5 #include "allocator.h"
6
7 // stderr, fprintf
8 #include <stdio.h>
9 // strlen
10 #include <string.h>
11
12
13 /*! \class EStringData estring.h
14
15 This private helper class contains the actual string data. It has
16 three fields, all accessible only to string. The only noteworthy
17 field is max, which is 0 in the case of a shared/read-only string,
18 and nonzero in the case of a string which can be modified.
19 */
20
21
22 /*! \fn EStringData::EStringData()
23
24 Creates a zero-length string. This is naturally read-only.
25 */
26
27 /*! Creates a new EString with \a bytes capacity. */
28
EStringData(int bytes)29 EStringData::EStringData( int bytes )
30 : str( 0 ), len( 0 ), max( bytes )
31 {
32 if ( str )
33 str = (char*)Allocator::alloc( max, 0 );
34 }
35
36
operator new(size_t ownSize,uint extra)37 void * EStringData::operator new( size_t ownSize, uint extra )
38 {
39 return Allocator::alloc( ownSize + extra, 1 );
40 }
41
42
43 /*! \class EString estring.h
44 An email-oriented 8-bit string class.
45
46 The string data are counted, so null bytes are allowed, and most
47 operations are very fast.
48
49 The data structure uses a simplified variant of reference counting,
50 where only "one" and "many" are possible. The detach() function
51 ensures that the count is "one" afterwards. Many functions leave
52 the count on "many", even ones such as mid().
53
54 The usual string functions are implemented, along with a variety
55 of email-specific operations such as eQP(), deQP(), needsQP(),
56 e64(). boring() returns true if the string can be used unquoted in
57 e.g. MIME, quoted() quotes it. upper() and lower() have a third
58 sibling, headerCased(). simplified() and trimmed() remove white
59 space in ways email often needs.
60
61 Several people have asked why we use this class instead of
62 std::string, and UString instead of std::basic_string<uint>. At
63 least one told us we were wrong and wheel reinvention and so on.
64
65 The reason is that working with email using EString is fairly
66 pleasant. Doing the same kind of work with std::string is a pain.
67
68 Both wheels are round. EString carries its load and moves it
69 smoothly along.
70 */
71
72 /*! Creates an empty EString */
73
EString()74 EString::EString()
75 : d( 0 )
76 {
77 }
78
79 /*! Creates a EString from the NUL-terminated string \a s.
80 The NUL is not copied. */
81
EString(const char * s)82 EString::EString( const char *s )
83 : d( 0 )
84 {
85 *this = s;
86 }
87
88 /*! Creates a EString from the first \a n bytes of \a s, which may
89 contain NULs. */
90
EString(const char * s,uint n)91 EString::EString( const char *s, uint n )
92 : d( 0 )
93 {
94 append( s, n );
95 }
96
97
98 /*! Creates a copy of \a s. */
99
EString(const EString & s)100 EString::EString( const EString &s )
101 : Garbage(), d( 0 )
102 {
103 *this = s;
104 }
105
106
107 /*! \fn void EString::detach()
108
109 Ensures that the string is modifiable. All EString functions call
110 this prior to modifying the string. */
111
112
113 /*! Destroys the string.
114
115 Because EString is used so much, and can eat up such vast amounts
116 of memory so quickly, this destructor does something: If the
117 string is the sole owner of its data, it frees them.
118
119 As of April 2005, the return values of data() or cstr() are NO
120 LONGER valid after a string has gone out of scope or otherwise been
121 lost.
122 */
123
~EString()124 EString::~EString()
125 {
126 if ( d && d->max )
127 Allocator::dealloc( d );
128 d = 0;
129 }
130
131
132 /*! Deletes \a p. (This function exists only so that gcc -O3 doesn't
133 decide that EString objects don't need destruction.)
134 */
135
operator delete(void * p)136 void EString::operator delete( void *p )
137 {
138 EStringData * & d = ((EString *)p)->d;
139 if ( d && d->max )
140 Allocator::dealloc( d );
141 d = 0;
142 }
143
144
145 /*! Copies \a other to this string and returns a reference to this
146 string. */
147
operator =(const EString & other)148 EString & EString::operator=( const EString & other )
149 {
150 d = other.d;
151 if ( d )
152 d->max = 0;
153 return *this;
154 }
155
156
157 /*! Copies \a s to this string and returns a reference to this
158 string. If \a s is a null pointer, the result is an empty string. */
159
operator =(const char * s)160 EString & EString::operator=( const char * s )
161 {
162 if ( !s || !*s ) {
163 d = 0;
164 return *this;
165 }
166
167 uint len = strlen( s );
168 if ( d && d->max )
169 d->len = 0;
170 reserve( len );
171 d->len = len;
172 memmove( d->str, s, d->len );
173 return *this;
174 }
175
176
177 /*! \fn uint EString::length() const
178
179 Returns the length of the string. The length does not include any
180 terminator or padding. */
181
182
183 /*! \fn uint EString::capacity() const
184
185 Returns the capacity of the string variable, that is, how long the
186 string can be before it has to allocate memory.
187 */
188
189
190 /*! \fn const char *EString::data() const
191
192 Returns a pointer to the string's byte representation, which is
193 NOT necessarily zero-terminated. */
194
195
196 /*! Returns the zero-terminated byte representation of the
197 string. Note that even though the return value is zero-terminated,
198 it can also contain null bytes in the middle.
199
200 Even though this function modifies memory, it doesn't detach(),
201 since it doesn't modify the string. However, in most cases its
202 call to reserve() causes a detach().
203 */
204
cstr()205 const char * EString::cstr()
206 {
207 reserve( length()+1 );
208 d->str[d->len] = '\0';
209 return data();
210 }
211
212
213 /*! This const version of cstr() is the same as the non-const version
214 above. The only difference is that it can be called on a const
215 object, and that it may cause some memory allocation elsewhere.
216 */
217
cstr() const218 const char * EString::cstr() const
219 {
220 if ( d && d->max > d->len ) {
221 d->str[d->len] = '\0';
222 return data();
223 }
224 EString tmp;
225 tmp.reserve( length() + 1 );
226 tmp = *this;
227 return tmp.cstr();
228 }
229
230
231 /*! Returns a copy of this string where all upper-case letters (A-Z -
232 this is ASCII only) have been changed to lower case. */
233
lower() const234 EString EString::lower() const
235 {
236 EString result( *this );
237 uint i = 0;
238 while ( i < result.length() ) {
239 if ( result[i] >= 'A' && result[i] <= 'Z' ) {
240 result.detach();
241 result.d->str[i] = result.d->str[i] + 32;
242 }
243 i++;
244 }
245 return result;
246 }
247
248
249 /*! Returns a copy of this string where all lower-case letters (a-z -
250 this is ASCII only) have been changed to upper case. */
251
upper() const252 EString EString::upper() const
253 {
254 EString result( *this );
255 uint i = 0;
256 while ( i < result.length() ) {
257 if ( result[i] >= 'a' && result[i] <= 'z' ) {
258 result.detach();
259 result.d->str[i] = result.d->str[i] - 32;
260 }
261 i++;
262 }
263 return result;
264 }
265
266
267 /*! Returns a copy of this string where all letters have been changed
268 to conform to typical mail header practice: Letters following digits
269 and other letters are lower-cased. Other letters are upper-cased
270 (notably including the very first character). */
271
headerCased() const272 EString EString::headerCased() const
273 {
274 EString result( *this );
275 uint i = 0;
276 bool u = true;
277 while ( i < length() ) {
278 if ( u && result[i] >= 'a' && result[i] <= 'z' ) {
279 result.detach();
280 result.d->str[i] = result[i] - 32;
281 }
282 else if ( !u && result[i] >= 'A' && result[i] <= 'Z' ) {
283 result.detach();
284 result.d->str[i] = result[i] + 32;
285 }
286 if ( ( result[i] >= 'A' && result[i] <= 'Z' ) ||
287 ( result[i] >= 'a' && result[i] <= 'z' ) ||
288 ( result[i] >= '0' && result[i] <= '9' ) )
289 u = false;
290 else
291 u = true;
292 i++;
293 }
294 return result;
295 }
296
297
298
299
300
301 /*! Returns the position of the first occurence of \a c on or after \a i
302 in this string, or -1 if there is none.
303 */
304
find(char c,int i) const305 int EString::find( char c, int i ) const
306 {
307 while ( i < (int)length() && at( i ) != c )
308 i++;
309 if ( i < (int)length() )
310 return i;
311 return -1;
312 }
313
314
315 /*! Returns the position of the first occurence of \a s on or after \a i
316 in this string, or -1 if there is none.
317 */
318
find(const EString & s,int i) const319 int EString::find( const EString & s, int i ) const
320 {
321 uint j = 0;
322 while ( j < s.length() && i+j < length() ) {
323 if ( d->str[i+j] == s.d->str[j] ) {
324 j++;
325 }
326 else {
327 j = 0;
328 i++;
329 }
330 }
331 if ( j == s.length() )
332 return i;
333 return -1;
334 }
335
336
337 /*! Returns section \a n of this string, where a section is defined as
338 a run of sequences separated by \a s. If \a s is the empty string
339 or \a n is 0, section() returns this entire string. If this string
340 contains fewer instances of \a s than \a n (ie. section \a n is
341 after the end of the string), section returns an empty string.
342 */
343
section(const EString & s,uint n) const344 EString EString::section( const EString & s, uint n ) const
345 {
346 if ( s.isEmpty() || n == 0 )
347 return *this;
348
349 int b = 0;
350 while ( n && b <= (int)length() ) {
351 int e = find( s, b );
352 if ( e < 0 )
353 e = length();
354 if ( n == 1 )
355 return mid( b, e - b );
356 n--;
357 b = e + s.length();
358 }
359 return "";
360 }
361
362
363 /*! Appends \a other to this string. */
364
append(const EString & other)365 void EString::append( const EString & other )
366 {
367 if ( !other.length() )
368 return;
369 if ( !length() && ( !modifiable() || d->max < other.length() ) ) {
370 // if this isn't modifiable, we just make a copy of the other
371 // string. only sensible thing to do. if it's modifiable, but
372 // we don't have enough bytes, we also just glue ourselves
373 // onto the other. maybe we'll need to copy later, but maybe
374 // not.
375 *this = other;
376 return;
377 }
378 reserve( length() + other.length() );
379 memmove( d->str+d->len, other.d->str, other.d->len );
380 d->len += other.d->len;
381 }
382
383
384 /*! \overload
385 This version of append() appends \a num raw bytes from memory
386 \a base. If \a base is null, this function does nothing.
387 */
388
append(const char * base,uint num)389 void EString::append( const char * base, uint num )
390 {
391 if ( !base || !num )
392 return;
393
394 reserve( length() + num );
395 memmove( d->str + d->len, base, num );
396 d->len += num;
397 }
398
399
400 /*! \overload
401
402 This version of append() appends the null-terminated string \a s,
403 or does nothing if \a s is null.
404 */
405
append(const char * s)406 void EString::append( const char * s )
407 {
408 if ( s )
409 append( s, strlen( s ) );
410 }
411
412
413 /*! \overload
414 This version of append() appends the single character \a c.
415 */
416
append(char c)417 void EString::append( char c )
418 {
419 reserve( length() + 1 );
420 d->str[d->len] = c;
421 d->len++;
422 }
423
424
425 /*! Prepends \a other to this string. */
426
prepend(const EString & other)427 void EString::prepend( const EString & other )
428 {
429 if ( other.isEmpty() )
430 return;
431 reserve( length() + other.length() );
432 memmove( d->str + other.length(), d->str, length() );
433 memmove( d->str, other.d->str, other.length() );
434 setLength( length() + other.length() );
435 }
436
437
appendNumber(int n,int base)438 void EString::appendNumber( int n, int base )
439 {
440 if ( n < 0 ) {
441 append( '-' );
442 n = -n;
443 }
444 appendNumber( (uint)n, base );
445 }
446
appendNumber(uint n,int base)447 void EString::appendNumber( uint n, int base )
448 {
449 EString s( fromNumber( n, base ) );
450
451 if ( n > 0 )
452 append( s );
453 else
454 append( "0" );
455 }
456
457 /*! Ensures that there is at least \a num bytes available in this
458 string. This implicitly causes the string to become modifiable and
459 have a nonzero number of available bytes.
460
461 After calling reserve(), capacity() is at least as large as \a
462 num, while length() has not changed.
463 */
464
reserve(uint num)465 void EString::reserve( uint num )
466 {
467 if ( num < length() )
468 num = length();
469 if ( !num )
470 num = 1;
471 if ( !d || d->max < num )
472 reserve2( num );
473 }
474
475 /*! Equivalent to reserve(). reserve( \a num ) calls this function to
476 do the heavy lifting. This function is not inline, while reserve()
477 is, and calls to this function should be interesting wrt. memory
478 allocation statistics.
479
480 Noone except reserve() should call reserve2().
481 */
482
reserve2(uint num)483 void EString::reserve2( uint num )
484 {
485 num = Allocator::rounded( num + sizeof( EStringData ) ) - sizeof( EStringData );
486
487 EStringData * freeable = 0;
488 if ( d && d->max )
489 freeable = d;
490
491 EStringData * nd = new( num ) EStringData( 0 );
492 nd->max = num;
493 nd->str = sizeof( EStringData ) + (char*)nd;
494 if ( d )
495 nd->len = d->len;
496 if ( nd->len > num )
497 nd->len = num;
498 if ( d && d->len )
499 memmove( nd->str, d->str, nd->len );
500 d = nd;
501
502 if ( freeable )
503 Allocator::dealloc( freeable );
504 }
505
506
507 /*! Ensures that the string's length is either \a l or length(),
508 whichever is smaller. If \a l is 0 (the default), the string will be
509 empty after the function is called.
510 */
511
truncate(uint l)512 void EString::truncate( uint l )
513 {
514 if ( !l ) {
515 if ( d && d->max )
516 Allocator::dealloc( d );
517 d = 0;
518 }
519 else if ( l < length() ) {
520 detach();
521 d->len = l;
522 }
523 }
524
525
526 /*! Ensures that the string's length is \a l. If \a l is 0, the string
527 will be empty after the function is called. If \a l is longer than
528 the string used to be, the new part is uninitialised.
529 */
530
setLength(uint l)531 void EString::setLength( uint l )
532 {
533 reserve( l );
534 if ( l )
535 d->len = d->max;
536 truncate( l );
537 }
538
539
540 /*! Returns a string containing the data starting at position \a start
541 of this string, extending for \a num bytes. \a num may be left out,
542 in which case the rest of the string is returned.
543
544 If \a start is too large, an empty string is returned.
545 */
546
mid(uint start,uint num) const547 EString EString::mid( uint start, uint num ) const
548 {
549 if ( !d )
550 num = 0;
551 else if ( num > d->len || start + num > d->len )
552 num = d->len - start;
553
554 EString result;
555 if ( !num || start >= length() )
556 return result;
557
558 d->max = 0;
559 result.d = new EStringData;
560 result.d->str = d->str + start;
561 result.d->len = num;
562 return result;
563 }
564
565
566 /*! Returns true is the string is quoted with \a c (default '"') as
567 quote character and \a q (default '\') as escape character. \a c
568 and \a q may be the same. */
569
isQuoted(char c,char q) const570 bool EString::isQuoted( char c, char q ) const
571 {
572 if ( length() < 2 || at( 0 ) != c || at( length() - 1 ) != c )
573 return false;
574 // skip past double escapes
575 uint i = length() - 2;
576 while ( i > 1 && d->str[i] == q && d->str[i-1] == q )
577 i = i - 2;
578 // empty string left?
579 if ( i == 0 )
580 return true;
581 // trailing quote escaped?
582 if ( d->str[i] == q )
583 return false;
584 return true;
585 }
586
587
588 /*! Returns the unquoted representation of the string if it isQuoted()
589 and the string itself else.
590
591 \a c at the start and end are removed; any occurence of \a c
592 within the string is left alone; an occurence of \a q followed by
593 \a c is converted into just \a c.
594 */
595
unquoted(char c,char q) const596 EString EString::unquoted( char c, char q ) const
597 {
598 if ( !isQuoted( c, q ) )
599 return *this;
600 EString r;
601 r.reserve( length() );
602 uint i = 1;
603 while ( i < length()-1 ) {
604 if ( d->str[i] == q )
605 i++;
606 r.append( d->str[i] );
607 i++;
608 }
609 return r;
610 }
611
612
613 /*! Returns a version of this string quited with \a c, and where any
614 occurences of \a c or \a q are escaped with \a q.
615 */
616
quoted(char c,char q) const617 EString EString::quoted( char c, char q ) const
618 {
619 EString r;
620 r.reserve( length()+2 );
621 r.append( c );
622 uint i = 0;
623 while ( i < length() ) {
624 if ( d->str[i] == c || d->str[i] == q )
625 r.append( q );
626 r.append( d->str[i] );
627 i++;
628 }
629 r.append( c );
630 return r;
631 }
632
633
634 /*! Returns true if this string is really boring, and false if it's
635 empty or contains at least one character that may warrant quoting
636 in some context. So far RFC 822 atoms, 2822 atoms, IMAP atoms and
637 MIME tokens are considered.
638
639 This function considers the intersection of those character
640 classes to be the Totally boring subset. If \a b is not its
641 default value, it may include other characters.
642 */
643
boring(Boring b) const644 bool EString::boring( Boring b ) const
645 {
646 if ( isEmpty() )
647 return false; // empty strings aren't boring - they may need quoting
648 uint i = 0;
649 bool exciting = false;
650 while ( i < length() && !exciting ) {
651 switch ( d->str[i] ) {
652 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
653 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
654 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
655 case 'V': case 'W': case 'X': case 'Y': case 'Z':
656
657 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
658 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
659 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
660 case 'v': case 'w': case 'x': case 'y': case 'z':
661
662 case '0': case '1': case '2': case '3': case '4':
663 case '5': case '6': case '7': case '8': case '9':
664
665 case '!':
666 case '#':
667 case '$':
668 case '&':
669 case '+':
670 case '-':
671 break;
672
673 case '.':
674 if ( b != MIME )
675 exciting = true;
676 break;
677
678 default:
679 exciting = true;
680 break;
681 }
682 i++;
683 }
684 if ( exciting ) // if we saw an exiting character...
685 return false;
686 return true;
687 }
688
689
690 /*! Returns a copy of this string where each run of whitespace is
691 compressed to a single ASCII 32, and where leading and trailing
692 whitespace is removed altogether.
693 */
694
simplified() const695 EString EString::simplified() const
696 {
697 // scan for the first nonwhitespace character
698 uint i = 0;
699 uint first = 0;
700 while ( i < length() && first == i ) {
701 char c = d->str[i];
702 if ( c == 9 || c == 10 || c == 13 || c == 32 )
703 first++;
704 i++;
705 }
706 // scan on to find the last nonwhitespace character and detect any
707 // sequences of two or more whitespace characters within the
708 // string.
709 uint last = first;
710 uint spaces = 0;
711 bool identity = true;
712 while ( identity && i < length() ) {
713 char c = d->str[i];
714 if ( c == 9 || c == 10 || c == 13 || c == 32 ) {
715 spaces++;
716 }
717 else {
718 if ( spaces > 1 )
719 identity = false;
720 spaces = 0;
721 last = i;
722 }
723 i++;
724 }
725 if ( identity )
726 return mid( first, last+1-first );
727
728 EString result;
729 result.reserve( length() );
730 i = 0;
731 spaces = 0;
732 while ( i < length() ) {
733 char c = d->str[i];
734 if ( c == 9 || c == 10 || c == 13 || c == 32 ) {
735 spaces++;
736 }
737 else {
738 if ( spaces && !result.isEmpty() )
739 result.append( ' ' );
740 spaces = 0;
741 result.append( c );
742 }
743 i++;
744 }
745 return result;
746 }
747
748
749 /*! Returns a copy of this string where leading and trailing
750 whitespace have been removed.
751 */
752
trimmed() const753 EString EString::trimmed() const
754 {
755 uint i = 0;
756 uint first = length();
757 uint last = 0;
758 while ( i < length() ) {
759 char c = d->str[i];
760 if ( c != 9 && c != 10 && c != 13 && c != 32 ) {
761 if ( i < first )
762 first = i;
763 if ( i > last )
764 last = i;
765 }
766 i++;
767 }
768
769 if ( last >= first )
770 return mid( first, last + 1 - first );
771
772 EString empty;
773 return empty;
774 }
775
776
777 /*! Returns a copy of this EString with at most one trailing LF or CRLF
778 removed. If there's more than one LF or CRLF, the remainder are
779 left.
780 */
781
stripCRLF() const782 EString EString::stripCRLF() const
783 {
784 uint n = 0;
785 if ( endsWith( "\r\n" ) )
786 n = 2;
787 else if ( endsWith( "\n" ) )
788 n = 1;
789
790 return mid( 0, length() - n );
791 }
792
793
794 /*! Returns the lowercase-hexadecimal representation of the string. */
795
hex() const796 EString EString::hex() const
797 {
798 EString s;
799 s.reserve( length()*2 );
800
801 uint i = 0;
802 while ( i < length() ) {
803 uint x = d->str[i];
804 s.appendNumber( x/16, 16 );
805 s.appendNumber( x&15, 16 );
806 i++;
807 }
808
809 return s;
810 }
811
operator +(const EString & a,const EString & b)812 const EString operator+( const EString & a, const EString & b )
813 {
814 EString result;
815 result.reserve( a.length() + b.length() );
816 result.append( a );
817 result.append( b );
818 return result;
819 }
820
821
822 /*! Returns true if this string starts with \a prefix, and false if it
823 does not.
824 */
825
startsWith(const EString & prefix) const826 bool EString::startsWith( const EString & prefix ) const
827 {
828 return length() >= prefix.length() &&
829 prefix == mid( 0, prefix.length() );
830 }
831
832
833 /*! Returns true if this string starts with \a prefix, and false if it
834 does not.
835 */
836
startsWith(const char * prefix) const837 bool EString::startsWith( const char * prefix ) const
838 {
839 if ( !prefix )
840 return true;
841 uint i = 0;
842 while ( prefix[i] && prefix[i] == at( i ) )
843 i++;
844 if ( prefix[i] )
845 return false;
846 return true;
847 }
848
849
850 /*! Returns true if this string ends with \a suffix, and false if it
851 does not.
852 */
853
endsWith(const EString & suffix) const854 bool EString::endsWith( const EString & suffix ) const
855 {
856 return length() >= suffix.length() &&
857 suffix == mid( length()-suffix.length() );
858 }
859
860
861 /*! Returns true if this string ends with \a suffix, and false if it
862 does not.
863 */
864
endsWith(const char * suffix) const865 bool EString::endsWith( const char * suffix ) const
866 {
867 if ( !suffix )
868 return true;
869 uint l = strlen( suffix );
870 if ( l > length() )
871 return false;
872 uint i = 0;
873 while ( i < l && suffix[i] == d->str[d->len - l + i] )
874 i++;
875 if ( i < l )
876 return false;
877 return true;
878 }
879
880
881 /*! Returns the number encoded by this string, and sets \a *ok to true
882 if that number is valid, or to false if the number is invalid. By
883 default the number is encoded in base 10, if \a base is specified
884 that base is used. \a base must be at least 2 and at most 36.
885
886 If the number is invalid (e.g. negative), number() returns 0.
887
888 If \a ok is a null pointer, it is not modified.
889 */
890
number(bool * ok,uint base) const891 uint EString::number( bool * ok, uint base ) const
892 {
893 uint i = 0;
894 uint n = 0;
895
896 bool good = !isEmpty();
897 while ( good && i < length() ) {
898 if ( d->str[i] < '0' || d->str[i] > 'z' )
899 good = false;
900
901 uint digit = d->str[i] - '0';
902
903 // hex or something?
904 if ( digit > 9 ) {
905 uint c = d->str[i];
906 if ( c > 'Z' )
907 c = c - 32;
908 digit = c - 'A' + 10;
909 }
910
911 // is the digit too large?
912 if ( digit >= base )
913 good = false;
914
915 // Would n overflow if we multiplied by 10 and added digit?
916 if ( n > UINT_MAX/base )
917 good = false;
918 n *= base;
919 if ( n >= (UINT_MAX - UINT_MAX % base) && digit > (UINT_MAX % base) )
920 good = false;
921 n += digit;
922
923 i++;
924 }
925
926 if ( !good )
927 n = 0;
928
929 if ( ok )
930 *ok = good;
931
932 return n;
933 }
934
935
936 /*! Returns a string representing the number \a n in the \a base
937 system, which is 10 (decimal) by default and must be in the range
938 2-36.
939
940 For 0, "0" is returned.
941
942 For bases 11-36, lower-case letters are used for the digits beyond
943 9.
944 */
945
fromNumber(int64 n,uint base)946 EString EString::fromNumber( int64 n, uint base )
947 {
948 EString r;
949 r.appendNumber( n, base );
950 return r;
951 }
952
953
954
955 /*! Converts \a n to a number in the \a base system and appends the
956 result to this string. If \a n is 0, "0" is appended.
957
958 Uses lower-case for digits above 9.
959 */
960
appendNumber(int64 n,uint base)961 void EString::appendNumber( int64 n, uint base )
962 {
963 int64 top = 1;
964 while ( top * base <= n )
965 top = base * top;
966 while ( top ) {
967 uint d = ( n / top ) % base;
968 char c = '0' + d;
969 if ( d > 9 )
970 c = 'a' + d - 10;
971 append( c );
972 top = top / base;
973 }
974 }
975
976
977 /*! Returns an \a e encoded version of this EString. If \a e is Base64,
978 then \a n specifies the maximum line length.
979 The default is 0, i.e. no limit.
980
981 This function does not support Uuencode. If \a e is Uuencode, it
982 returns the input string.
983 */
984
encoded(Encoding e,uint n) const985 EString EString::encoded( Encoding e, uint n ) const
986 {
987 if ( e == Base64 )
988 return e64( n );
989 else if ( e == QP )
990 return eQP( false, n > 0 );
991 return *this;
992 }
993
994
995 /*! Returns a \a e decoded version of this EString. */
996
decoded(Encoding e) const997 EString EString::decoded( Encoding e ) const
998 {
999 if ( e == Base64 )
1000 return de64();
1001 else if ( e == QP )
1002 return deQP();
1003 else if ( e == Uuencode )
1004 return deUue();
1005 return *this;
1006 }
1007
1008
1009 /*! Returns a version of this EString with absolutely nothing changed.
1010 (This function is eventually intended to percent-escape URIs, the
1011 opposite of deURI().)
1012 */
1013
eURI() const1014 EString EString::eURI() const
1015 {
1016 return *this;
1017 }
1018
1019
1020 /*! Returns a version of this EString with every %xx escape replaced with
1021 the corresponding character (as used to encode URIs). Invalid escape
1022 sequences are left unchanged, so this function cannot be used for
1023 input from potentially malevolent sources.
1024 */
1025
deURI() const1026 EString EString::deURI() const
1027 {
1028 uint l = length();
1029
1030 EString s;
1031 s.reserve( l );
1032
1033 uint p = 0;
1034 while ( p < l ) {
1035 char c = d->str[p];
1036 if ( c == '%' ) {
1037 bool ok;
1038 uint n = mid( p+1, 2 ).number( &ok, 16 );
1039 if ( ok && l > p + 2 ) {
1040 p += 2;
1041 c = (char)n;
1042 }
1043 }
1044 s.append( c );
1045 p++;
1046 }
1047
1048 return s;
1049 }
1050
1051
1052 /*! An implementation of uudecode, sufficient to handle some
1053 occurences of "content-transfer-encoding: x-uuencode"
1054 seen. Possibly not correct according to POSIX 1003.2b, who knows.
1055 */
1056
deUue() const1057 EString EString::deUue() const
1058 {
1059 if ( isEmpty() )
1060 return *this;
1061 uint i = 0;
1062 if ( !startsWith( "begin" ) ) {
1063 int begin = find( "\nbegin" );
1064 if ( begin < 0 )
1065 begin = find( "\rbegin" );
1066 if ( begin < 0 )
1067 return *this;
1068 i = (uint)begin+1;
1069 }
1070 EString r;
1071 while ( i < d->len ) {
1072 // step 0. skip over nonspace until CR/LF
1073 while ( i < d->len && d->str[i] != 13 && d->str[i] != 10 )
1074 i++;
1075 // step 1. skip over whitespace to the next length marker.
1076 while ( i < d->len &&
1077 ( d->str[i] == 9 || d->str[i] == 10 ||
1078 d->str[i] == 13 || d->str[i] == 32 ) )
1079 i++;
1080 // step 2. the length byte, or the end line.
1081 uint linelength = 0;
1082 if ( i < d->len ) {
1083 char c = d->str[i];
1084 if ( c == 'e' && i < d->len - 2 &&
1085 d->str[i+1] == 'n' && d->str[i+2] == 'd' &&
1086 ( i + 3 == d->len ||
1087 d->str[i+3] == 13 || d->str[i+3] == 10 ||
1088 d->str[i+3] == 9 || d->str[i+3] == 32 ) )
1089 return r;
1090 else if ( c < 32 )
1091 return *this;
1092 else
1093 linelength = (c - 32) & 63;
1094 i++;
1095 }
1096 // step 3. the line data. we assume it's in groups of 4 tokens.
1097 while ( linelength && i < d->len ) {
1098 char c0 = 0, c1 = 0, c2 = 0, c3 = 0;
1099 if ( i < d->len )
1100 c0 = 63 & ( d->str[i] - 32 );
1101 if ( i+1 < d->len )
1102 c1 = 63 & ( d->str[i+1] - 32 );
1103 if ( i+2 < d->len )
1104 c2 = 63 & ( d->str[i+2] - 32 );
1105 if ( i+3 < d->len )
1106 c3 = 63 & ( d->str[i+3] - 32 );
1107 i += 4;
1108 if ( linelength > 0 ) {
1109 r.append( ( (c0 << 2) | (c1 >> 4) ) & 255 );
1110 linelength--;
1111 }
1112 if ( linelength > 0 ) {
1113 r.append( ( (c1 << 4) | (c2 >> 2) ) & 255 );
1114 linelength--;
1115 }
1116 if ( linelength > 0 ) {
1117 r.append( ( (c2 << 6) | (c3 ) ) & 255 );
1118 linelength--;
1119 }
1120 }
1121 }
1122 // we ran off the end without seeing an end line. what to do?
1123 // return what we've seen so far?
1124 return r;
1125 }
1126
1127
1128
1129 static char from64[128] =
1130 {
1131 64, 99, 99, 99, 99, 99, 99, 99,
1132 65, 99, 65, 99, 99, 65, 99, 99,
1133 99, 99, 99, 99, 99, 99, 99, 99,
1134 99, 99, 99, 99, 99, 99, 99, 99,
1135
1136 // 32
1137 99, 99, 99, 99, 99, 99, 99, 99,
1138 99, 99, 99, 62, 99, 99, 99, 63,
1139 52, 53, 54, 55, 56, 57, 58, 59,
1140 60, 61, 99, 99, 99, 64, 99, 99,
1141
1142 // 64
1143 99, 0, 1, 2, 3, 4, 5, 6,
1144 7, 8, 9, 10, 11, 12, 13, 14,
1145 15, 16, 17, 18, 19, 20, 21, 22,
1146 23, 24, 25, 99, 99, 99, 99, 99,
1147
1148 // 96
1149 99, 26, 27, 28, 29, 30, 31, 32,
1150 33, 34, 35, 36, 37, 38, 39, 40,
1151 41, 42, 43, 44, 45, 46, 47, 48,
1152 49, 50, 51, 99, 99, 99, 99, 99
1153 };
1154
1155
1156
1157 /*! Decodes this string using the base-64 algorithm and returns the result. */
1158
de64() const1159 EString EString::de64() const
1160 {
1161 // this code comes from mailchen, adapted for EString.
1162 EString result;
1163 result.reserve( length() * 3 / 4 + 20 ); // 20 = fudge
1164 EString body;
1165 uint bp = 0;
1166 uint decoded = 0;
1167 int m = 0;
1168 uint p = 0;
1169 bool done = false;
1170 while ( p < length() && !done ) {
1171 uint c = d->str[p++];
1172 if ( c <= 'z' )
1173 c = from64[c];
1174 if ( c < 64 ) {
1175 switch ( m ) {
1176 case 0:
1177 decoded = c << 2;
1178 break;
1179 case 1:
1180 decoded += ( (c & 0xf0) >> 4 );
1181 result.d->str[bp++] = decoded;
1182 decoded = (c & 15) << 4;
1183 break;
1184 case 2:
1185 decoded += ( (c & 0xfc) >> 2 );
1186 result.d->str[bp++] = decoded;
1187 decoded = (c & 3) << 6;
1188 break;
1189 case 3:
1190 decoded += c;
1191 result.d->str[bp++] = decoded;
1192 break;
1193 }
1194 m = (m+1)&3;
1195 }
1196 else if ( c == 64 ) {
1197 done = true;
1198 }
1199 else if ( c == 65 ) {
1200 // white space; perfectly normal and may be ignored.
1201 }
1202 else {
1203 // we're supposed to ignore all other characters. so
1204 // that's what we do, even though it may not be ideal in
1205 // all cases... consider that later.
1206 }
1207 }
1208 result.d->len = bp;
1209 return result;
1210 }
1211
1212
1213 static char to64[65] =
1214 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1215
1216 /*! Encodes this string using the base-64 algorithm and returns the
1217 result in lines of at most \a lineLength characters. If \a
1218 lineLength is not supplied, e64() returns a single line devoid of
1219 whitespace.
1220 */
1221
e64(uint lineLength) const1222 EString EString::e64( uint lineLength ) const
1223 {
1224 // this code comes from mailchen, adapted for EString
1225 int l = length();
1226 int i = 0;
1227 EString r;
1228 r.reserve( l*2 );
1229 int p = 0;
1230 uint c = 0;
1231 while ( i <= l-3 ) {
1232 r.d->str[p++] = to64[ ((d->str[i]>>2))&63 ];
1233 r.d->str[p++] = to64[ ((d->str[i]<<4)&48) + ((d->str[i+1]>>4)&15) ];
1234 r.d->str[p++] = to64[ ((d->str[i+1]<<2)&60) + ((d->str[i+2]>>6)&3) ];
1235 r.d->str[p++] = to64[ (d->str[i+2]&63) ];
1236 i += 3;
1237 c += 4;
1238 if ( lineLength > 0 && c >= lineLength ) {
1239 r.d->str[p++] = 13;
1240 r.d->str[p++] = 10;
1241 c = 0;
1242 }
1243 }
1244 if ( i < l ) {
1245 int i0, i1, i2;
1246 i0 = d->str[i];
1247 i1 = i+1 < l ? d->str[i+1] : 0;
1248 i2 = i+2 < l ? d->str[i+2] : 0;
1249 r.d->str[p++] = to64[ ((i0>>2))&63 ];
1250 r.d->str[p++] = to64[ ((i0<<4)&48) + ((i1>>4)&15) ];
1251 if ( i+1 < l )
1252 r.d->str[p++] = to64[ ((i1<<2)&60) + ((i2>>6)&3) ];
1253 else
1254 r.d->str[p++] = '=';
1255 if ( i+2 < l )
1256 r.d->str[p++] = to64[ (i2&63) ];
1257 else
1258 r.d->str[p++] = '=';
1259 }
1260 if ( lineLength > 0 && c > 0 ) {
1261 r.d->str[p++] = 13;
1262 r.d->str[p++] = 10;
1263 }
1264 r.d->len = p;
1265 return r;
1266 }
1267
1268
1269 /*! Decodes this string according to the quoted-printable algorithm,
1270 and returns the result. Errors are overlooked, to cope with all
1271 the mail-munging brokenware in the great big world.
1272
1273 If \a underscore is true, underscores in the input are translated
1274 into spaces (as specified in RFC 2047).
1275 */
1276
deQP(bool underscore) const1277 EString EString::deQP( bool underscore ) const
1278 {
1279 uint i = 0;
1280 EString r;
1281 r.reserve( length() );
1282 while ( i < length() ) {
1283 if ( d->str[i] != '=' ) {
1284 char c = d->str[i++];
1285 if ( underscore && c == '_' )
1286 c = ' ';
1287 r.d->str[r.d->len++] = c;
1288 }
1289 else {
1290 // are we looking at = followed by end-of-line?
1291 bool ok = false;
1292 uint c = 0;
1293 bool eol = false;
1294 uint j = i+1;
1295 // skip possibly appended whitespace first
1296 while ( j < length() &&
1297 ( d->str[j] == ' ' || d->str[j] == '\t' ) )
1298 j++;
1299 // there are two types of soft EOLs:
1300 if ( j < d->len && d->str[j] == 10 ) {
1301 eol = true;
1302 j++;
1303 }
1304 else if ( j < d->len-1 && d->str[j] == 13 && d->str[j+1] == 10 ) {
1305 eol = true;
1306 j = j + 2;
1307 }
1308 else if ( i + 2 < d->len ) {
1309 // ... and one common case: a two-digit hex number, not EOL
1310 c = mid( i+1, 2 ).number( &ok, 16 );
1311 }
1312
1313 // write the proper decoded string and increase i.
1314 if ( eol ) { // ... if it's a soft EOL
1315 i = j;
1316 }
1317 else if ( ok ) { // ... or if it's a two-digit hex number
1318 r.d->str[r.d->len++] = c;
1319 i = i + 3;
1320 }
1321 else { // ... or if it's an error... we overlook it
1322 r.d->str[r.d->len++] = d->str[i++];
1323 }
1324 }
1325 }
1326 return r;
1327 }
1328
1329
1330 static char qphexdigits[17] = "0123456789ABCDEF";
1331
1332
maybeBoundary(const EString & s,uint i)1333 static bool maybeBoundary( const EString & s, uint i ) {
1334 if ( s.length() < i + 2 )
1335 return false;
1336 if ( s[i] != '-' || s[i+1] != '-' )
1337 return false;
1338
1339 while ( i < s.length() && s[i] >= ' ' ) {
1340 //bchars := bcharsnospace / " "
1341 //bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" /
1342 // "+" / "_" / "," / "-" / "." /
1343 // "/" / ":" / "=" / "?"
1344 switch( s[i] ) {
1345 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1346 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1347 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1348 case 'v': case 'w': case 'x': case 'y': case 'z':
1349 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1350 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1351 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1352 case 'V': case 'W': case 'X': case 'Y': case 'Z':
1353 case '0': case '1': case '2': case '3': case '4': case '5': case '6':
1354 case '7': case '8': case '9':
1355 case '\'':
1356 case '(': case ')': case '+': case '_': case ',': case '-': case '.':
1357 case '/': case ':': case '=': case '?':
1358 case ' ':
1359 // ok
1360 break;
1361 default:
1362 return false;
1363 }
1364 ++i;
1365 }
1366 return true;
1367 }
1368
1369
1370 /*! Encodes this string using the quoted-printable algorithm and
1371 returns the encoded version. In the encoded version, all line
1372 feeds are CRLF, and soft line feeds are positioned so that the q-p
1373 looks as good as it can.
1374
1375 Note that this function is slightly incompatible with RFC 2646: It
1376 encodes trailing spaces, as suggested in RFC 2045, but RFC 2646
1377 suggest that if trailing spaces are the only reason to q-p, then
1378 the message should not be encoded.
1379
1380 If \a underscore is present and true, this function uses the variant
1381 of q-p specified by RFC 2047, where a space is encoded as an
1382 underscore and a few more characters need to be encoded.
1383
1384 If \a from is present and true, this function also makes sure that
1385 no output line starts with "From " or looks like a MIME boundary.
1386 */
1387
eQP(bool underscore,bool from) const1388 EString EString::eQP( bool underscore, bool from ) const
1389 {
1390 if ( isEmpty() )
1391 return *this;
1392 uint i = 0;
1393 EString r;
1394 // no input character can use more than six output characters (=
1395 // CR LF = 3 D), so we allocate as much space as we could possibly
1396 // need.
1397 r.reserve( length()*6 );
1398 uint c = 0;
1399 while ( i < d->len ) {
1400 if ( d->str[i] == 10 ||
1401 ( i < d->len-1 && d->str[i] == 13 && d->str[i+1] == 10 ) ) {
1402 // we have a line feed. if the last character on the line
1403 // was a space, we need to quote that to protect it.
1404 if ( r.d->len > 0 && r.d->str[r.d->len-1] == ' ' ) {
1405 r.d->str[r.d->len-1] = '=';
1406 r.d->str[r.d->len++] = '2';
1407 r.d->str[r.d->len++] = '0';
1408 }
1409 c = 0;
1410 if ( d->str[i] == 13 )
1411 r.d->str[r.d->len++] = d->str[i++];
1412 r.d->str[r.d->len++] = 10;
1413 // worst case: five bytes
1414 }
1415 else {
1416 if ( c > 72 ) {
1417 uint j = 1;
1418 while ( j < 10 && r.d->str[r.d->len-j] != ' ' )
1419 j++;
1420 if ( j >= 10 )
1421 j = 0;
1422 else
1423 j--;
1424 uint k = 1;
1425 while ( k <= j ) {
1426 r.d->str[r.d->len - k + 3] = r.d->str[r.d->len - k];
1427 k++;
1428 }
1429 // always CRLF for soft linefeed
1430 r.d->str[r.d->len++ - j] = '=';
1431 r.d->str[r.d->len++ - j] = 13;
1432 r.d->str[r.d->len++ - j] = 10;
1433 c = j;
1434 }
1435
1436 if ( underscore && d->str[i] == ' ' ) {
1437 r.d->str[r.d->len++] = '_';
1438 c += 1;
1439 }
1440 else if ( underscore &&
1441 ! ( ( d->str[i] >= '0' && d->str[i] <= '9' ) ||
1442 ( d->str[i] >= 'a' && d->str[i] <= 'z' ) ||
1443 ( d->str[i] >= 'A' && d->str[i] <= 'Z' ) ) ) {
1444 r.d->str[r.d->len++] = '=';
1445 r.d->str[r.d->len++] = qphexdigits[d->str[i]/16];
1446 r.d->str[r.d->len++] = qphexdigits[d->str[i]%16];
1447 c += 3;
1448 }
1449 else if ( from && c == 0 && maybeBoundary( *this, i ) ) {
1450 r.d->str[r.d->len++] = '=';
1451 r.d->str[r.d->len++] = qphexdigits[d->str[i]/16];
1452 r.d->str[r.d->len++] = qphexdigits[d->str[i]%16];
1453 c += 3;
1454 }
1455 else if ( from && c == 0 && d->len >= i + 4 &&
1456 d->str[i] == 'F' && d->str[i+1] == 'r' &&
1457 d->str[i+2] == 'o' && d->str[i+3] == 'm' &&
1458 d->str[i+4] == ' ' ) {
1459 r.d->str[r.d->len++] = '=';
1460 r.d->str[r.d->len++] = qphexdigits[d->str[i]/16];
1461 r.d->str[r.d->len++] = qphexdigits[d->str[i]%16];
1462 c += 3;
1463 }
1464 else if ( ( d->str[i] >= ' ' && d->str[i] < 127 &&
1465 d->str[i] != '=' ) ||
1466 ( d->str[i] == '\t' ) ) {
1467 r.d->str[r.d->len++] = d->str[i];
1468 c++;
1469 }
1470 else {
1471 r.d->str[r.d->len++] = '=';
1472 r.d->str[r.d->len++] = qphexdigits[d->str[i]/16];
1473 r.d->str[r.d->len++] = qphexdigits[d->str[i]%16];
1474 c += 3;
1475 }
1476 }
1477 i++;
1478 }
1479 return r;
1480 }
1481
1482
1483 /*! This function returns true if the string would need to be encoded
1484 using quoted-printable. It is a greatly simplified copy of eQP(),
1485 with the changes made necessary by RFC 2646.
1486 */
1487
needsQP() const1488 bool EString::needsQP() const
1489 {
1490 uint i = 0;
1491 uint c = 0;
1492 while ( i < length() ) {
1493 if ( c == 0 && maybeBoundary( *this, i ) )
1494 return true;
1495 if ( c == 0 && d->str[i] == 'F' && d->str[i+1] == 'r' )
1496 return true;
1497 if ( d->str[i] == 10 )
1498 c = 0;
1499 else if ( c > 78 )
1500 return true;
1501 else if ( ( d->str[i] >= ' ' && d->str[i] < 127 ) ||
1502 ( d->str[i] == '\t' ) ||
1503 ( d->str[i] == 13 && d->str[i+1] == 10 ) )
1504 c++;
1505 else
1506 return true;
1507 i++;
1508 }
1509 return false;
1510
1511 }
1512
1513
1514 /*! Returns -1 if this string is lexicographically before \a other, 0
1515 if they are the same, and 1 if this string is lexicographically
1516 after \a other.
1517
1518 The comparison is case sensitive - just a byte comparison.
1519 */
1520
compare(const EString & other) const1521 int EString::compare( const EString & other ) const
1522 {
1523 if ( d == other.d )
1524 return 0;
1525 uint i = 0;
1526 while ( i < length() && i < other.length() &&
1527 d->str[i] == other.d->str[i] )
1528 i++;
1529 if ( i >= length() && i >= other.length() )
1530 return 0;
1531 if ( i >= length() )
1532 return -1;
1533 if ( i >= other.length() )
1534 return 1;
1535 if ( d->str[i] < other.d->str[i] )
1536 return -1;
1537 return 1;
1538 }
1539
1540
operator <(const EString & other) const1541 bool EString::operator<( const EString & other ) const
1542 {
1543 return compare( other ) < 0;
1544 }
1545
1546
operator >(const EString & other) const1547 bool EString::operator>( const EString & other ) const
1548 {
1549 return compare( other ) > 0;
1550 }
1551
1552
operator <=(const EString & other) const1553 bool EString::operator<=( const EString & other ) const
1554 {
1555 return compare( other ) <= 0;
1556 }
1557
1558
operator >=(const EString & other) const1559 bool EString::operator>=( const EString & other ) const
1560 {
1561 return compare( other ) >= 0;
1562 }
1563
1564
operator <(const char * other) const1565 bool EString::operator<( const char * other ) const
1566 {
1567 if ( !other )
1568 return false;
1569
1570 uint l = length();
1571 uint i = 0;
1572 while ( i < l && d->str[i] == other[i] && other[i] )
1573 i++;
1574 // four cases: we ran out of characters, other did, both did,
1575 // there's a difference
1576 if ( i == l && !other[i] )
1577 return false;
1578 if ( !other[i] )
1579 return false;
1580 if ( i == l )
1581 return true;
1582 return d->str[i] < other[i];
1583 }
1584
1585
1586 /*! This function is a debugging aid. It prints the contents of the
1587 string within single quotes followed by a trailing newline to
1588 stderr.
1589 */
1590
print() const1591 void EString::print() const
1592 {
1593 uint i = 0;
1594
1595 fprintf( stderr, "'" );
1596 while ( i < length() )
1597 fprintf( stderr, "%c", d->str[i++] );
1598 fprintf( stderr, "'\n" );
1599 }
1600
1601
1602 /*! Returns \a n as a string representing that number in a
1603 human-readable fashion optionally suffixed by K, M, G or T.
1604
1605 The number is rounded more or less correctly.
1606 */
1607
humanNumber(int64 n)1608 EString EString::humanNumber( int64 n )
1609 {
1610 if ( n < 1024 )
1611 return fromNumber( n );
1612
1613 int64 f = 1024;
1614 char s = 'K';
1615 if ( n < 1024 * 1024 ) {
1616 // ok
1617 }
1618 else if ( n < 1024 * 1024 * 1024 ) {
1619 f = 1024 * 1024;
1620 s = 'M';
1621 }
1622 else if ( n < 1024LL * 1024 * 1024 * 1024 ) {
1623 f = 1024 * 1024 * 1024;
1624 s = 'G';
1625 }
1626 else {
1627 // terabytes. we don't use petabytes or exabytes since people
1628 // don't know their abbreviations by heart.
1629 f = 1024LL * 1024 * 1024 * 1024;
1630 s = 'T';
1631 }
1632
1633 EString r;
1634 // if it's single-digit, we add a decimal point. since we only go
1635 // to TB, not petabyte or exabyte, we don't need to check for
1636 // INT64_MAX/10. (actually we'd only need that check for exabytes.)
1637 if ( n < f * 10 ) {
1638 n += f/20-1;
1639 r = fromNumber( n/f );
1640 uint m = (n%f)/(f/10);
1641 r.append( '.' );
1642 r.append( '0' + m );
1643 }
1644 else {
1645 n += f/2-1;
1646 r = fromNumber( n/f );
1647 }
1648 r.append( s );
1649 return r;
1650 }
1651
1652
1653 // all the keywords we know about, found by grepping through message/*.cpp
1654 static const char * keywords[] = {
1655 "7bit", "8bit", "alternative", "aug", "binary", "bcc", "cc", "comments",
1656 "content-description", "content-disposition", "content-id",
1657 "content-language", "content-location", "content-md5",
1658 "content-transfer-encoding", "content-type", "date", "fri", "from",
1659 "in-reply-to", "jun", "jul", "keywords", "may", "message-id",
1660 "mime-version", "mon", "orig-date",
1661 "received", "references", "reply-to", "resent-bcc", "resent-cc",
1662 "resent-date", "resent-from", "resent-message-id", "resent-sender",
1663 "resent-to", "return-path", "sender", "sep", "subject", "to",
1664 "us-ascii", "adt", "akdt", "akst", "apr", "ast", "attachment",
1665 "base64", "body", "boundary", "brt", "bst", "bytes", "cadt", "cast",
1666 "cct", "cdt", "ces", "cest", "cet", "charset", "cst", "cut", "data",
1667 "dec", "deleted", "digest", "eadt", "east", "edt", "eet", "est",
1668 "feb", "flag", "fri", "gmt", "grnlnddt", "grnlndst", "hadt", "hast",
1669 "helo", "hkt", "hst", "html", "id", "idate", "inline", "jan", "jst",
1670 "kdt", "kst", "lhlo", "lines", "lockuidnext", "mar", "mdt", "message",
1671 "mest", "mesz", "met", "metdst", "mez", "mezt", "mon", "msd", "msk",
1672 "mst", "multipart", "name", "ndt", "nov", "nst", "nzdt", "nzst", "oct",
1673 "part", "plain", "pdt", "pst", "quit", "quoted-printable", "rawbytes",
1674 "rfc822", "rfc822size", "root", "sast", "sat", "seen", "sep",
1675 "supplied", "text", "tue", "uid", "us-ascii", "ut", "utc", "value",
1676 "wadt", "wast", "wed", "wet", "ydt", "yst",
1677 "mixed",
1678 0
1679 };
1680
1681 // helper for EString::anonymised()
isMungableChar(char c)1682 static inline bool isMungableChar( char c ) {
1683 if ( ( c >= 'a' && c <= 'z' ) ||
1684 ( c >= 'A' && c <= 'Z' ) ||
1685 ( c >= '0' && c <= '9' ) ||
1686 ( c == '=' ||
1687 c == '"' ||
1688 c == ':' ||
1689 c == '?' ||
1690 c == '-' ||
1691 c == '(' ||
1692 c == ')' ||
1693 c == '_' ) )
1694 return true;
1695 return false;
1696 }
1697
1698
1699 /*! Returns a copy of this string where most/all content has been
1700 replaced with the letter 'x' or the digit '4', but if the message
1701 was an RFC 822 message, it keeps the same parse tree.
1702
1703 Specifically, most ASCII words are changed to xxxx, while most/all
1704 syntax elements are kept.
1705
1706 This function is very, very slow. That's okay since it's only used
1707 for sending bug reports to us, and we all know, that's not a common
1708 case.
1709 */
1710
anonymised() const1711 EString EString::anonymised() const
1712 {
1713 uint b = 0;
1714 EString r;
1715 while ( b < length() ) {
1716 uint e = b;
1717 while ( e < d->len && ( d->str[e] > 127 ||
1718 isMungableChar( d->str[e] ) ) )
1719 e++;
1720 // we have a word.
1721 bool munge = true;
1722 if ( e == b )
1723 munge = false;
1724
1725 if ( munge && d->str[e-1] == ':' ) // header field names
1726 munge = false;
1727
1728 if ( munge ) { // mime parameters
1729 uint i = b;
1730 while ( i < e && d->str[i] != '"' && d->str[i] != '=' )
1731 i++;
1732 if ( i < e )
1733 munge = false;
1734 }
1735
1736 if ( munge && // boundary lines
1737 b + 2 <= e &&
1738 d->str[b] == '-' && d->str[b+1] == '-' ) {
1739 munge = false;
1740 }
1741
1742 if ( munge ) { // any keyword
1743 EString m = mid( b, e-b ).lower();
1744 uint i = 0;
1745 while ( keywords[i] && m != keywords[i] )
1746 i++;
1747 if ( keywords[i] )
1748 munge = false;
1749 }
1750
1751 if ( munge ) { // any word containing non-ascii
1752 uint i = b;
1753 while ( i < e && d->str[i] < 128 )
1754 i++;
1755 if ( i < e )
1756 munge = false;
1757 }
1758
1759 if ( munge ) {
1760 uint i = 0;
1761 while ( b + i < e ) {
1762 char c = d->str[b+i];
1763 if ( c >= 'a' && c <= 'z' )
1764 r.append( 'a' + (i%26) );
1765 else if ( c >= 'A' && c <= 'Z' )
1766 r.append( 'a' + (i%26) );
1767 else
1768 r.append( c );
1769 i++;
1770 }
1771 }
1772 else {
1773 r.append( mid( b, e-b ) );
1774 }
1775 b = e;
1776
1777 while ( b < d->len && !isMungableChar( d->str[b] ) ) {
1778 r.append( d->str[b] );
1779 b++;
1780 }
1781 }
1782
1783 return r;
1784 }
1785
1786
1787
1788 /*! Returns a copy of this string where every linefeed is CRLF, and
1789 where the last two characters are CRLF.
1790 */
1791
crlf() const1792 EString EString::crlf() const
1793 {
1794 bool copy = true;
1795 if ( length() < 2 ||
1796 d->str[d->len-1] != 10 ||
1797 d->str[d->len-2] != 13 )
1798 copy = false;
1799 uint i = 0;
1800 while ( copy && i < d->len ) {
1801 if ( d->str[i] == 13 && i < d->len && d->str[i+1] == 10 )
1802 i += 2;
1803 else if ( d->str[i] == 13 || d->str[i] == 10 )
1804 copy = false;
1805 else
1806 i++;
1807 }
1808 if ( copy )
1809 return *this;
1810
1811 EString r;
1812 r.reserve( length() );
1813 r.append( mid( 0, i ) );
1814 bool lf = false;
1815 uint len = 0;
1816 if ( d )
1817 len = d->len;
1818 while ( i < len ) {
1819 lf = false;
1820 char c = d->str[i++];
1821
1822 if ( c == 10 ) {
1823 lf = true;
1824 }
1825 else if ( c == 13 ) {
1826 lf = true;
1827 if ( i < d->len && d->str[i] == 10 )
1828 i++;
1829 else if ( i < d->len-1 &&
1830 d->str[i] == 13 && d->str[i+1] == 10 )
1831 i += 2;
1832 }
1833
1834 if ( lf )
1835 r.append( "\r\n" );
1836 else
1837 r.append( c );
1838 }
1839 if ( !lf )
1840 r.append( "\r\n" );
1841
1842 return r;
1843 }
1844
1845
1846 /*! Returns true if this string contains at least one instance of \a s. */
1847
contains(const EString & s) const1848 bool EString::contains( const EString & s ) const
1849 {
1850 if ( find( s ) >= 0 )
1851 return true;
1852 return false;
1853 }
1854
1855
1856 /*! Returns true if this string contains at least one instance of \a c. */
1857
contains(const char c) const1858 bool EString::contains( const char c ) const
1859 {
1860 if ( find( c ) >= 0 )
1861 return true;
1862 return false;
1863 }
1864
1865
1866 /*! Returns true if this string contains at least one instance of \a
1867 s, and the characters before and after the occurence aren't
1868 letters.
1869 */
1870
containsWord(const EString & s) const1871 bool EString::containsWord( const EString & s ) const
1872 {
1873 int i = find( s );
1874 while ( i >= 0 ) {
1875 bool before = false;
1876 bool after = false;
1877 if ( i == 0 ) {
1878 before = true;
1879 }
1880 else {
1881 char c = d->str[i-1];
1882 if ( c < 'A' || ( c > 'Z' && c < 'a' ) || c > 'z' )
1883 before = true;
1884 }
1885 if ( i + s.length() == length() ) {
1886 after = true;
1887 }
1888 else {
1889 char c = d->str[i+s.length()];
1890 if ( c < 'A' || ( c > 'Z' && c < 'a' ) || c > 'z' )
1891 after = true;
1892 }
1893 if ( before && after )
1894 return true;
1895 i = find( s, i+1 );
1896 }
1897 return false;
1898 }
1899
1900
1901 /*! Returns a copy of this string wrapped so that each line contains
1902 at most \a linelength characters. The first line is prefixed by \a
1903 firstPrefix, subsequent lines by \a otherPrefix. If \a spaceAtEOL
1904 is true, all lines except the last end with a space.
1905
1906 The prefixes are counted towards line length, but the optional
1907 trailing space is not.
1908
1909 Only space (ASCII 32) is a line-break opportunity. If there are
1910 multiple spaces where a line is broken, all the spaces are
1911 replaced by a single CRLF. Linefeeds added use CRLF.
1912 */
1913
wrapped(uint linelength,const EString & firstPrefix,const EString & otherPrefix,bool spaceAtEOL) const1914 EString EString::wrapped( uint linelength,
1915 const EString & firstPrefix, const EString & otherPrefix,
1916 bool spaceAtEOL ) const
1917 {
1918 // result must be modifiable() at all times, otherwise we allocate
1919 // all the RAM.
1920
1921 // working:
1922 EString result;
1923 result.reserve( length() );
1924 result.append( firstPrefix );
1925 // broken but should work. needs investigation.
1926 // EString result = firstPrefix;
1927 // result.reserve( length() );
1928
1929 // move is where we keep the text that has to be moved to the next
1930 // line. it too should be modifiable() all the time.
1931 EString move;
1932 uint i = 0;
1933 uint linestart = 0;
1934 uint space = 0;
1935 while ( i < length() ) {
1936 char c = at( i );
1937 if ( c == ' ' )
1938 space = result.length();
1939 else if ( c == '\n' )
1940 linestart = result.length() + 1;
1941 result.append( c );
1942 i++;
1943 // add a soft linebreak?
1944 if ( result.length() > linestart + linelength && space > linestart ) {
1945 while ( space > 0 && result[space-1] == ' ' )
1946 space--;
1947 linestart = space + 1;
1948 while ( result[linestart] == ' ' )
1949 linestart++;
1950 move.truncate();
1951 if ( result.length() > linestart )
1952 move.append( result.cstr() + linestart );
1953 if ( spaceAtEOL )
1954 result.truncate( space + 1 );
1955 else
1956 result.truncate( space );
1957 result.append( "\r\n" );
1958 result.append( otherPrefix );
1959 result.append( move );
1960 }
1961 }
1962 return result;
1963 }
1964
1965
1966 /*! Replaces all occurences of \a a in this string with \a b. Rather
1967 slow and allocates much memory. Could be optimised if it ever
1968 shows up on the performance graphs.
1969
1970 \a a must not be empty.
1971
1972 Replaced sections are not considered when looking for the next
1973 match.
1974 */
1975
replace(const EString & a,const EString & b)1976 void EString::replace( const EString & a, const EString & b )
1977 {
1978 if ( a == b)
1979 return; // noop
1980 if ( a.isEmpty() )
1981 return; // infinite loop
1982
1983 int i = find( a );
1984 while ( i >= 0 ) {
1985 EString r = mid( i+a.length() );
1986 truncate( i );
1987 append( b );
1988 append( r );
1989 i = find( a, i + b.length() );
1990 }
1991 }
1992