1 /*								      HTGuess.c
2 **	STREAM TO GUESS CONTENT-TYPE
3 **
4 **	(c) COPYRIGHT MIT 1995.
5 **	Please first read the full copyright statement in the file COPYRIGH.
6 **	@(#) $Id$
7 **
8 **	This version of the stream object just writes its input
9 **	to its output, but prepends Content-Type: field and an
10 **	empty line after it.
11 **
12 ** HISTORY:
13 **	 8 Jul 94  FM	Insulate free() from _free structure element.
14 **
15 */
16 
17 /* Library include files */
18 #include "wwwsys.h"
19 #include "WWWUtil.h"
20 #include "WWWLib.h"
21 #include "WWWFile.h"
22 #include "HTGuess.h"
23 
24 #define SAMPLE_SIZE	200	/* Number of chars to look at */
25 
26 /*		Stream Object
27 **		------------
28 */
29 
30 struct _HTStream {
31 	const HTStreamClass *	isa;
32 
33 	HTRequest *		request;
34         HTResponse *		response;
35 	HTFormat		output_format;
36 	HTStream *		output_stream;
37 	HTStream *		target;
38 
39 	BOOL			transparent;
40 	int			cnt;
41 	int			text_cnt;
42 	int			lf_cnt;
43 	int			cr_cnt;
44 	int			pg_cnt;
45 	int			ctrl_cnt;
46 	int			high_cnt;
47 	char *			write_ptr;
48 	char			buffer[ SAMPLE_SIZE + 1 ];
49 };
50 
51 #define PUT_CHAR(c)		(*me->target->isa->put_character)(me->target,(c))
52 #define PUT_STRING(s)		(*me->target->isa->put_string)(me->target,(s))
53 #define PUT_BLOCK(b,l)		(*me->target->isa->put_block)(me->target,(b),(l))
54 
55 /* ------------------------------------------------------------------------- */
56 
is_html(char * buf)57 PRIVATE BOOL is_html (char * buf)
58 {
59     char * p = strchr(buf,'<');
60 
61     if (p && (!strncasecomp(p, "<HTML>", 6) ||
62 	      !strncasecomp(p, "<!DOCTYPE HTML", 13) ||
63 	      !strncasecomp(p, "<HEAD", 5) ||
64 	      !strncasecomp(p, "<TITLE>", 7) ||
65 	      !strncasecomp(p, "<BODY>", 6) ||
66 	      !strncasecomp(p, "<PLAINTEXT>", 11) ||
67 	      (p[0]=='<' && TOUPPER(p[1]) == 'H' && p[3]=='>')))
68 	return YES;
69     else
70 	return NO;
71 }
72 
HTGuess_flush(HTStream * me)73 PRIVATE int HTGuess_flush (HTStream * me)
74 {
75     if (!me->transparent) {
76 	HTResponse * response = me->response;
77 
78 	/*
79 	**  First we look for magic tokens and evaluate the contents of the buffer
80 	**  that we are investigating.
81 	*/
82 	if (me->cnt) {
83 	    HTTRACE(STREAM_TRACE, "GUESSING.... Result of content analysis: Text=%d%% Newlines=%d%% Ctrl=%d%% High=%d%%\n" _
84 			(int)(100*me->text_cnt/me->cnt + 0.5) _
85 			(int)(100*me->lf_cnt  /me->cnt + 0.5) _
86 			(int)(100*me->ctrl_cnt/me->cnt + 0.5) _
87 			(int)(100*me->high_cnt/me->cnt + 0.5));
88 	}
89 
90 	if (!me->ctrl_cnt ||
91 	    me->text_cnt + me->lf_cnt >= 16 * (me->ctrl_cnt + me->high_cnt)) {
92 	    char *ptr;
93 	    /* some kind of text */
94 
95 	    *me->write_ptr = 0;	/* terminate buffer */
96 
97 	    if (me->high_cnt > 0)
98 		HTResponse_setContentTransferEncoding(response, WWW_CODING_8BIT);
99 	    else
100 		HTResponse_setContentTransferEncoding(response, WWW_CODING_7BIT);
101 
102 	    if (is_html(me->buffer))
103 		HTResponse_setFormat(response, HTAtom_for("text/html"));
104 
105 	    else if (!strncmp(me->buffer, "%!", 2))
106 		HTResponse_setFormat(response, HTAtom_for("application/postscript"));
107 
108 	    else if (strstr(me->buffer, "#define") &&
109 		     strstr(me->buffer, "_width") &&
110 		     strstr(me->buffer, "_bits"))
111 		HTResponse_setFormat(response, HTAtom_for("image/x-xbitmap"));
112 
113 	    else if ((ptr = strstr(me->buffer, "converted with BinHex"))!=NULL)
114 		HTResponse_setContentTransferEncoding(response, WWW_CODING_MACBINHEX);
115 
116 	    else if (!strncmp(me->buffer, "begin ", 6))
117 		HTResponse_setContentTransferEncoding(response, WWW_CODING_BASE64);
118 
119 	    else
120 		HTResponse_setFormat(response, WWW_PLAINTEXT);
121 	}
122 	else {
123 	    if (!strncmp(me->buffer, "GIF", 3))
124 		HTResponse_setFormat(response, WWW_GIF);
125 
126 	    else if (!strncmp(me->buffer, "\377\330\377\340", 4))
127 		HTResponse_setFormat(response, WWW_JPEG);
128 
129 	    else if (!strcmp(me->buffer, "MM"))	/* MM followed by a zero */
130 		HTResponse_setFormat(response, WWW_TIFF);
131 
132  	    else if (!strncmp(me->buffer, "\211PNG\r\n\032\n", 8))
133  		HTResponse_setFormat(response, WWW_PNG);
134 
135 	    else if (!strncmp(me->buffer, ".snd", 4))
136 		HTResponse_setFormat(response, WWW_AUDIO);
137 
138 	    else if (!strncmp(me->buffer, "\037\235", 2))
139 		HTResponse_addEncoding(response, WWW_CODING_COMPRESS);
140 
141 	    else if (!strncmp(me->buffer, "\037\213", 2))
142 		HTResponse_addEncoding(response, WWW_CODING_GZIP);
143 
144 	    else
145 		HTResponse_setFormat(response, WWW_BINARY);
146 	}
147 
148 	/*
149 	**  If we couldn't find any magic tokens then we try and look at the suffix
150 	**  of the URL file name and use our own bindings to see if that gives any
151 	**  results.
152 	*/
153 	if (HTResponse_format(response) == WWW_UNKNOWN) {
154 	    HTParentAnchor * anchor = HTRequest_anchor(me->request);
155 	    char * addr = HTAnchor_physical(anchor);
156 	    HTTRACE(STREAM_TRACE, "GUESSING.... Hmm - trying local bindings\n");
157 	    HTBind_getResponseBindings (response, addr);
158 	}
159 
160 	/*
161 	**  If nothing worked then give up and say binary...
162 	*/
163 	if (HTResponse_format(response) == WWW_UNKNOWN) {
164 	    HTTRACE(STREAM_TRACE, "GUESSING.... That's it - I'm giving up!\n");
165 	    HTResponse_setFormat(response, WWW_BINARY);
166 	}
167 
168 	HTTRACE(STREAM_TRACE, "Guessed..... Content-Type `%s\'\n" _ HTAtom_name(HTResponse_format(response)));
169 
170 	/*
171 	**  Set up the new stream stack with the type we figured out
172 	*/
173 	if ((me->target = HTStreamStack(HTResponse_format(response),
174 					me->output_format, me->output_stream,
175 					me->request, NO)) == NULL) {
176 	    HTTRACE(STREAM_TRACE, "HTGuess..... Can't convert media type\n");
177 	    me->target = HTErrorStream();
178 	}
179 	me->transparent = YES;
180 	return PUT_BLOCK(me->buffer, me->cnt);
181     }
182     return HT_OK;
183 }
184 
185 
HTGuess_put_block(HTStream * me,const char * b,int l)186 PRIVATE int HTGuess_put_block (HTStream * me, const char * b, int l)
187 {
188     while (!me->transparent && l-- > 0) {
189 	int status;
190 	if (me->target) {
191 	    if ((status = HTGuess_flush(me)) != HT_OK)
192 		return status;
193 	} else {
194 	    me->cnt++;
195 	    if (*b == LF)
196 		me->lf_cnt++;
197 	    else if (*b == CR)
198 		me->cr_cnt++;
199 	    else if (*b == 12)
200 		me->pg_cnt++;
201 	    else if (*b =='\t')
202 		me->text_cnt++;
203 	    else if ((unsigned char)*b < 32)
204 		me->ctrl_cnt++;
205 	    else if ((unsigned char)*b < 128)
206 		me->text_cnt++;
207 	    else
208 		me->high_cnt++;
209 	    *me->write_ptr++ = *b++;
210 	    if (me->cnt >= SAMPLE_SIZE) {
211 		if ((status = HTGuess_flush(me)) != HT_OK)
212 		    return status;
213 	    }
214 	}
215     }
216     if (l > 0)
217 	return PUT_BLOCK(b, l);
218     return HT_OK;
219 }
220 
HTGuess_put_character(HTStream * me,char c)221 PRIVATE int HTGuess_put_character (HTStream * me, char c)
222 {
223     return HTGuess_put_block(me, &c, 1);
224 }
225 
HTGuess_put_string(HTStream * me,const char * s)226 PRIVATE int HTGuess_put_string (HTStream * me, const char * s)
227 {
228     return HTGuess_put_block(me, s, (int) strlen(s));
229 }
230 
HTGuess_free(HTStream * me)231 PRIVATE int HTGuess_free (HTStream * me)
232 {
233     int status;
234     if (!me->transparent && (status = HTGuess_flush(me)) != HT_OK)
235 	return status;
236     else
237 	me->transparent = YES;
238     if ((status = (*me->target->isa->_free)(me->target)) != HT_OK)
239 	return status;
240     HT_FREE(me);
241     return HT_OK;
242 }
243 
HTGuess_abort(HTStream * me,HTList * e)244 PRIVATE int HTGuess_abort (HTStream * me, HTList * e)
245 {
246     if (me->target) (*me->target->isa->abort)(me->target, e);
247     HT_FREE(me);
248     return HT_ERROR;
249 }
250 
251 
252 /*	Guessing stream
253 **	---------------
254 */
255 PRIVATE const HTStreamClass HTGuessClass =
256 {
257 	"GuessWhat",
258 	HTGuess_flush,
259 	HTGuess_free,
260 	HTGuess_abort,
261 	HTGuess_put_character,
262  	HTGuess_put_string,
263 	HTGuess_put_block
264 };
265 
HTGuess_new(HTRequest * request,void * param,HTFormat input_format,HTFormat output_format,HTStream * output_stream)266 PUBLIC HTStream * HTGuess_new (HTRequest *	request,
267 			       void *		param,
268 			       HTFormat		input_format,
269 			       HTFormat		output_format,
270 			       HTStream *	output_stream)
271 {
272     HTStream * me;
273     if ((me = (HTStream  *) HT_CALLOC(1,sizeof(HTStream))) == NULL)
274         HT_OUTOFMEM("HTGuess_new");
275     me->isa = &HTGuessClass;
276     me->request = request;
277     me->response = HTRequest_response(request);
278     me->output_format = output_format;
279     me->output_stream = output_stream;
280     me->write_ptr = me->buffer;
281     return me;
282 }
283