1 // @(#) $Id: decoder.cc,v 1.7 2003/08/28 18:45:38 balu Exp $
2
3 #include "decoder.h"
4 #include <cstring>
5
6 unsigned char base64_rank[256]=
7 {255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
8 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
9 255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
10 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255,
11 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
12 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
13 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
14 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
15 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
16 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
17 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
18 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
19 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
20 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
21 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
22 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255};
23 char base64_code[65]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
24 "abcdefghijklmnopqrstuvwxyz"
25 "0123456789+/";
26
hexchar(char c)27 int hexchar(char c)
28 {
29 return c<='9' ? c-'0' : c-'A'+10;
30 }
31
decodeQuotedPrintable(const string & s)32 string decodeQuotedPrintable(const string &s)
33 {
34 string c;
35
36 for(string::const_iterator i=s.begin();i!=s.end();i++)
37 if(*i=='_')
38 c+=' ';
39 else
40 if(*i!='=')
41 c+=*i;
42 else
43 if(i+1!=s.end() && i+2!=s.end())
44 {
45 c+=hexchar(*(i+1))*16+hexchar(*(i+2));
46 i+=2;
47 }
48 else
49 break;
50 return c;
51 }
52
53 // Based on libgmime2 algorithm
decodeBase64(const string & s)54 string decodeBase64(const string &s)
55 {
56 string c;
57 unsigned int saved=0;
58 short int state=0;
59 string::const_iterator i;
60
61 // convert 4 base64 bytes to 3 normal bytes
62 for(i=s.begin();i!=s.end();i++)
63 {
64 unsigned int n=base64_rank[(unsigned char)*i];
65 if(n!=0xff)
66 {
67 saved=(saved<<6) | n;
68 state++;
69 if(state==4)
70 {
71 c+=saved >> 16;
72 c+=saved >> 8;
73 c+=saved;
74 state=0;
75 }
76 }
77 }
78
79 // quick scan back for '=' on the end somewhere
80 // fortunately we can drop 1 output char for each trailing = (upto 2)
81 state=2;
82 int len=c.length();
83 i=s.end();
84 while(i!=s.begin() && state)
85 {
86 i--;
87 if(base64_rank[(unsigned char)*i]!=0xff)
88 {
89 if(*i=='=' && len>0)
90 len--;
91 state--;
92 }
93 }
94 c.resize(len);
95 return c;
96 }
97
encodeBase64(const string & s)98 string encodeBase64(const string &s)
99 {
100 string c;
101 unsigned int saved=0;
102 short int state=0;
103
104 for(string::const_iterator i=s.begin();i!=s.end();i++)
105 {
106 saved=(saved<<8) | (unsigned char)*i;
107 state+=8;
108 while(state>=6)
109 {
110 state-=6;
111 c+=base64_code[(saved>>state) & 0x3f];
112 }
113 }
114 switch(state)
115 {
116 case 2:
117 c+=base64_code[(saved & 0x3)<<4];
118 c+="==";
119 break;
120 case 4:
121 c+=base64_code[(saved & 0xf)<<2];
122 c+="=";
123 break;
124 }
125 return c;
126 }
127
decode(const string & s,int enc)128 string decode(const string &s,int enc)
129 {
130 switch(enc)
131 {
132 case TE_QUOTED_PRINTABLE:
133 return decodeQuotedPrintable(s);
134 case TE_BASE64:
135 return decodeBase64(s);
136 default:
137 return s;
138 }
139 }
140
encode(const string & s)141 string encode(const string &s)
142 {
143 return encodeBase64(s);
144 }
145
decodeWord(const string & s)146 string decodeWord(const string &s)
147 {
148 int n=s.find('?',2);
149 string c=s.substr(n+3,s.length()-n-5);
150 switch(s[n+1])
151 {
152 case 'q':
153 case 'Q':
154 return decodeQuotedPrintable(c);
155 break;
156 case 'b':
157 case 'B':
158 return decodeBase64(c);
159 break;
160 }
161 return "";
162 }
163
isRFCSpace(char c)164 bool isRFCSpace(char c)
165 {
166 static const char spaces[]=" \t()<>@,;:'\"/[]?.=";
167
168 return strchr(spaces,c)!=NULL;
169 }
170
getRFCWord(const string & s,string::const_iterator p)171 string getRFCWord(const string &s,string::const_iterator p)
172 {
173 string::const_iterator i=p+1;
174
175 for(;i!=s.end();i++)
176 switch(*i)
177 {
178 case '=':
179 if((i+1==s.end() || isRFCSpace(*(i+1))) && *(i-1)=='?')
180 return string(p,i+1);
181 break;
182 case ' ':
183 case '\t':
184 case '\n':
185 case '\r':
186 return string();
187 }
188 return string();
189 }
190
decodeHeader(const string & s)191 string decodeHeader(const string &s)
192 {
193 string c,space;
194 bool midspace=false;
195
196 for(string::const_iterator i=s.begin();i!=s.end();i++)
197 switch(*i)
198 {
199 case ' ':
200 case '\t':
201 case '\n':
202 case '\r':
203 if(midspace)
204 space+=*i;
205 else
206 c+=*i;
207 break;
208 case '=':
209 if((i==s.begin() || isRFCSpace(*(i-1))) &&
210 i+1!=s.end() && *(i+1)=='?')
211 {
212 string ss=getRFCWord(s,i);
213 if(ss.length()!=0)
214 {
215 c+=decodeWord(ss);
216 i+=ss.length()-1;
217 midspace=true;
218 space=string();
219 break;
220 }
221 }
222 default:
223 if(midspace)
224 {
225 midspace=false;
226 c+=space;
227 space=string();
228 }
229 c+=*i;
230 }
231 return c;
232 }
233