1 /* HTGuess.c
2 ** STREAM TO GUESS CONTENT-TYPE
3 **
4 ** (c) COPYRIGHT MIT 1995.
5 ** Please first read the full copyright statement in the file COPYRIGH.
6 ** @(#) $Id$
7 **
8 ** This version of the stream object just writes its input
9 ** to its output, but prepends Content-Type: field and an
10 ** empty line after it.
11 **
12 ** HISTORY:
13 ** 8 Jul 94 FM Insulate free() from _free structure element.
14 **
15 */
16
17 /* Library include files */
18 #include "wwwsys.h"
19 #include "WWWUtil.h"
20 #include "WWWLib.h"
21 #include "WWWFile.h"
22 #include "HTGuess.h"
23
24 #define SAMPLE_SIZE 200 /* Number of chars to look at */
25
26 /* Stream Object
27 ** ------------
28 */
29
30 struct _HTStream {
31 const HTStreamClass * isa;
32
33 HTRequest * request;
34 HTResponse * response;
35 HTFormat output_format;
36 HTStream * output_stream;
37 HTStream * target;
38
39 BOOL transparent;
40 int cnt;
41 int text_cnt;
42 int lf_cnt;
43 int cr_cnt;
44 int pg_cnt;
45 int ctrl_cnt;
46 int high_cnt;
47 char * write_ptr;
48 char buffer[ SAMPLE_SIZE + 1 ];
49 };
50
51 #define PUT_CHAR(c) (*me->target->isa->put_character)(me->target,(c))
52 #define PUT_STRING(s) (*me->target->isa->put_string)(me->target,(s))
53 #define PUT_BLOCK(b,l) (*me->target->isa->put_block)(me->target,(b),(l))
54
55 /* ------------------------------------------------------------------------- */
56
is_html(char * buf)57 PRIVATE BOOL is_html (char * buf)
58 {
59 char * p = strchr(buf,'<');
60
61 if (p && (!strncasecomp(p, "<HTML>", 6) ||
62 !strncasecomp(p, "<!DOCTYPE HTML", 13) ||
63 !strncasecomp(p, "<HEAD", 5) ||
64 !strncasecomp(p, "<TITLE>", 7) ||
65 !strncasecomp(p, "<BODY>", 6) ||
66 !strncasecomp(p, "<PLAINTEXT>", 11) ||
67 (p[0]=='<' && TOUPPER(p[1]) == 'H' && p[3]=='>')))
68 return YES;
69 else
70 return NO;
71 }
72
HTGuess_flush(HTStream * me)73 PRIVATE int HTGuess_flush (HTStream * me)
74 {
75 if (!me->transparent) {
76 HTResponse * response = me->response;
77
78 /*
79 ** First we look for magic tokens and evaluate the contents of the buffer
80 ** that we are investigating.
81 */
82 if (me->cnt) {
83 HTTRACE(STREAM_TRACE, "GUESSING.... Result of content analysis: Text=%d%% Newlines=%d%% Ctrl=%d%% High=%d%%\n" _
84 (int)(100*me->text_cnt/me->cnt + 0.5) _
85 (int)(100*me->lf_cnt /me->cnt + 0.5) _
86 (int)(100*me->ctrl_cnt/me->cnt + 0.5) _
87 (int)(100*me->high_cnt/me->cnt + 0.5));
88 }
89
90 if (!me->ctrl_cnt ||
91 me->text_cnt + me->lf_cnt >= 16 * (me->ctrl_cnt + me->high_cnt)) {
92 char *ptr;
93 /* some kind of text */
94
95 *me->write_ptr = 0; /* terminate buffer */
96
97 if (me->high_cnt > 0)
98 HTResponse_setContentTransferEncoding(response, WWW_CODING_8BIT);
99 else
100 HTResponse_setContentTransferEncoding(response, WWW_CODING_7BIT);
101
102 if (is_html(me->buffer))
103 HTResponse_setFormat(response, HTAtom_for("text/html"));
104
105 else if (!strncmp(me->buffer, "%!", 2))
106 HTResponse_setFormat(response, HTAtom_for("application/postscript"));
107
108 else if (strstr(me->buffer, "#define") &&
109 strstr(me->buffer, "_width") &&
110 strstr(me->buffer, "_bits"))
111 HTResponse_setFormat(response, HTAtom_for("image/x-xbitmap"));
112
113 else if ((ptr = strstr(me->buffer, "converted with BinHex"))!=NULL)
114 HTResponse_setContentTransferEncoding(response, WWW_CODING_MACBINHEX);
115
116 else if (!strncmp(me->buffer, "begin ", 6))
117 HTResponse_setContentTransferEncoding(response, WWW_CODING_BASE64);
118
119 else
120 HTResponse_setFormat(response, WWW_PLAINTEXT);
121 }
122 else {
123 if (!strncmp(me->buffer, "GIF", 3))
124 HTResponse_setFormat(response, WWW_GIF);
125
126 else if (!strncmp(me->buffer, "\377\330\377\340", 4))
127 HTResponse_setFormat(response, WWW_JPEG);
128
129 else if (!strcmp(me->buffer, "MM")) /* MM followed by a zero */
130 HTResponse_setFormat(response, WWW_TIFF);
131
132 else if (!strncmp(me->buffer, "\211PNG\r\n\032\n", 8))
133 HTResponse_setFormat(response, WWW_PNG);
134
135 else if (!strncmp(me->buffer, ".snd", 4))
136 HTResponse_setFormat(response, WWW_AUDIO);
137
138 else if (!strncmp(me->buffer, "\037\235", 2))
139 HTResponse_addEncoding(response, WWW_CODING_COMPRESS);
140
141 else if (!strncmp(me->buffer, "\037\213", 2))
142 HTResponse_addEncoding(response, WWW_CODING_GZIP);
143
144 else
145 HTResponse_setFormat(response, WWW_BINARY);
146 }
147
148 /*
149 ** If we couldn't find any magic tokens then we try and look at the suffix
150 ** of the URL file name and use our own bindings to see if that gives any
151 ** results.
152 */
153 if (HTResponse_format(response) == WWW_UNKNOWN) {
154 HTParentAnchor * anchor = HTRequest_anchor(me->request);
155 char * addr = HTAnchor_physical(anchor);
156 HTTRACE(STREAM_TRACE, "GUESSING.... Hmm - trying local bindings\n");
157 HTBind_getResponseBindings (response, addr);
158 }
159
160 /*
161 ** If nothing worked then give up and say binary...
162 */
163 if (HTResponse_format(response) == WWW_UNKNOWN) {
164 HTTRACE(STREAM_TRACE, "GUESSING.... That's it - I'm giving up!\n");
165 HTResponse_setFormat(response, WWW_BINARY);
166 }
167
168 HTTRACE(STREAM_TRACE, "Guessed..... Content-Type `%s\'\n" _ HTAtom_name(HTResponse_format(response)));
169
170 /*
171 ** Set up the new stream stack with the type we figured out
172 */
173 if ((me->target = HTStreamStack(HTResponse_format(response),
174 me->output_format, me->output_stream,
175 me->request, NO)) == NULL) {
176 HTTRACE(STREAM_TRACE, "HTGuess..... Can't convert media type\n");
177 me->target = HTErrorStream();
178 }
179 me->transparent = YES;
180 return PUT_BLOCK(me->buffer, me->cnt);
181 }
182 return HT_OK;
183 }
184
185
HTGuess_put_block(HTStream * me,const char * b,int l)186 PRIVATE int HTGuess_put_block (HTStream * me, const char * b, int l)
187 {
188 while (!me->transparent && l-- > 0) {
189 int status;
190 if (me->target) {
191 if ((status = HTGuess_flush(me)) != HT_OK)
192 return status;
193 } else {
194 me->cnt++;
195 if (*b == LF)
196 me->lf_cnt++;
197 else if (*b == CR)
198 me->cr_cnt++;
199 else if (*b == 12)
200 me->pg_cnt++;
201 else if (*b =='\t')
202 me->text_cnt++;
203 else if ((unsigned char)*b < 32)
204 me->ctrl_cnt++;
205 else if ((unsigned char)*b < 128)
206 me->text_cnt++;
207 else
208 me->high_cnt++;
209 *me->write_ptr++ = *b++;
210 if (me->cnt >= SAMPLE_SIZE) {
211 if ((status = HTGuess_flush(me)) != HT_OK)
212 return status;
213 }
214 }
215 }
216 if (l > 0)
217 return PUT_BLOCK(b, l);
218 return HT_OK;
219 }
220
HTGuess_put_character(HTStream * me,char c)221 PRIVATE int HTGuess_put_character (HTStream * me, char c)
222 {
223 return HTGuess_put_block(me, &c, 1);
224 }
225
HTGuess_put_string(HTStream * me,const char * s)226 PRIVATE int HTGuess_put_string (HTStream * me, const char * s)
227 {
228 return HTGuess_put_block(me, s, (int) strlen(s));
229 }
230
HTGuess_free(HTStream * me)231 PRIVATE int HTGuess_free (HTStream * me)
232 {
233 int status;
234 if (!me->transparent && (status = HTGuess_flush(me)) != HT_OK)
235 return status;
236 else
237 me->transparent = YES;
238 if ((status = (*me->target->isa->_free)(me->target)) != HT_OK)
239 return status;
240 HT_FREE(me);
241 return HT_OK;
242 }
243
HTGuess_abort(HTStream * me,HTList * e)244 PRIVATE int HTGuess_abort (HTStream * me, HTList * e)
245 {
246 if (me->target) (*me->target->isa->abort)(me->target, e);
247 HT_FREE(me);
248 return HT_ERROR;
249 }
250
251
252 /* Guessing stream
253 ** ---------------
254 */
255 PRIVATE const HTStreamClass HTGuessClass =
256 {
257 "GuessWhat",
258 HTGuess_flush,
259 HTGuess_free,
260 HTGuess_abort,
261 HTGuess_put_character,
262 HTGuess_put_string,
263 HTGuess_put_block
264 };
265
HTGuess_new(HTRequest * request,void * param,HTFormat input_format,HTFormat output_format,HTStream * output_stream)266 PUBLIC HTStream * HTGuess_new (HTRequest * request,
267 void * param,
268 HTFormat input_format,
269 HTFormat output_format,
270 HTStream * output_stream)
271 {
272 HTStream * me;
273 if ((me = (HTStream *) HT_CALLOC(1,sizeof(HTStream))) == NULL)
274 HT_OUTOFMEM("HTGuess_new");
275 me->isa = &HTGuessClass;
276 me->request = request;
277 me->response = HTRequest_response(request);
278 me->output_format = output_format;
279 me->output_stream = output_stream;
280 me->write_ptr = me->buffer;
281 return me;
282 }
283