1 /*
2  * $LynxId: HTFormat.h,v 1.37 2020/01/21 22:02:59 tom Exp $
3  *
4  *                                            HTFormat: The format manager in the WWW Library
5  *                          MANAGE DIFFERENT DOCUMENT FORMATS
6  *
7  * Here we describe the functions of the HTFormat module which handles conversion between
8  * different data representations.  (In MIME parlance, a representation is known as a
9  * content-type.  In WWW the term "format" is often used as it is shorter).
10  *
11  * This module is implemented by HTFormat.c.  This hypertext document is used to generate
12  * the HTFormat.h include file.  Part of the WWW library.
13  */
14 #ifndef HTFORMAT_H
15 #define HTFORMAT_H
16 
17 #include <HTStream.h>
18 #include <HTAtom.h>
19 #include <HTList.h>
20 #include <HTAnchor.h>
21 
22 #ifdef USE_SOURCE_CACHE
23 #include <HTChunk.h>
24 #endif
25 
26 #ifdef USE_BZLIB
27 #include <bzlib.h>
28 #endif
29 
30 #ifdef USE_ZLIB
31 #include <zlib.h>
32 #endif
33 
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 /*
38 
39    These macros (which used to be constants) define some basic internally
40    referenced representations.  The www/xxx ones are of course not MIME
41    standard.
42 
43    www/source is an output format which leaves the input untouched.  It is
44    useful for diagnostics, and for users who want to see the original, whatever
45    it is.
46 
47 																	 */
48 /* Internal ones */
49 /* #define WWW_SOURCE HTAtom_for("www/source") */
50 /* Whatever it was originally */
51     extern HTAtom *WWW_SOURCE;
52     /* calculated once, heavy used */
53 
54 /*
55 
56    www/present represents the user's perception of the document.  If you
57    convert to www/present, you present the material to the user.
58 
59  */
60 #define WWW_PRESENT HTAtom_for("www/present")	/* The user's perception */
61 
62 #define WWW_DEBUG       HTAtom_for("www/debug")
63 /*
64 
65    WWW_DEBUG represents the user's perception of debug information, for example
66    sent as a HTML document in a HTTP redirection message.
67 
68  */
69 
70 /*
71 
72    The message/rfc822 format means a MIME message or a plain text message with
73    no MIME header.  This is what is returned by an HTTP server.
74 
75  */
76 #define WWW_MIME HTAtom_for("www/mime")		/* A MIME message */
77 
78 /*
79   For parsing only the header. - kw
80   */
81 #define WWW_MIME_HEAD   HTAtom_for("message/x-rfc822-head")
82 
83 /*
84 
85    www/print is like www/present except it represents a printed copy.
86 
87  */
88 #define WWW_PRINT HTAtom_for("www/print")	/* A printed copy */
89 
90 /*
91 
92    www/unknown is a really unknown type.  Some default action is appropriate.
93 
94  */
95 #define WWW_UNKNOWN     HTAtom_for("www/unknown")
96 
97 #ifdef DIRED_SUPPORT
98 /*
99    www/dired signals directory edit mode.
100 */
101 #define WWW_DIRED      HTAtom_for("www/dired")
102 #endif
103 
104 /*
105 
106    These are regular MIME types.  HTML is assumed to be added by the W3 code.
107    application/octet-stream was mistakenly application/binary in earlier libwww
108    versions (pre 2.11).
109 
110  */
111 #define STR_BINARY      "application/octet-stream"
112 #define STR_PLAINTEXT   "text/plain"
113 #define STR_HTML        "text/html"
114 
115 #define WWW_BINARY      HTAtom_for(STR_BINARY)
116 #define WWW_PLAINTEXT   HTAtom_for(STR_PLAINTEXT)
117 #define WWW_HTML        HTAtom_for(STR_HTML)
118 
119 #define WWW_POSTSCRIPT  HTAtom_for("application/postscript")
120 #define WWW_RICHTEXT    HTAtom_for("application/rtf")
121 #define WWW_AUDIO       HTAtom_for("audio/basic")
122 
123     typedef HTAtom *HTEncoding;
124 
125 /*
126  * The following are values for the MIME types:
127  */
128 #define WWW_ENC_7BIT            HTAtom_for("7bit")
129 #define WWW_ENC_8BIT            HTAtom_for("8bit")
130 #define WWW_ENC_BINARY          HTAtom_for("binary")
131 
132 /*
133  * We also add
134  */
135 #define WWW_ENC_COMPRESS        HTAtom_for("compress")
136 
137 /*
138  * Does a string designate a real encoding, or is it just
139  * a "dummy" as for example 7bit, 8bit, and binary?
140  */
141 #define IsUnityEncStr(senc) \
142         ((senc)==NULL || *(senc)=='\0' || !strcmp(senc,"identity") ||\
143         !strcmp(senc,"8bit") || !strcmp(senc,"binary") || !strcmp(senc,"7bit"))
144 
145 #define IsUnityEnc(enc) \
146         ((enc)==NULL || (enc)==HTAtom_for("identity") ||\
147         (enc)==WWW_ENC_8BIT || (enc)==WWW_ENC_BINARY || (enc)==WWW_ENC_7BIT)
148 
149 /*
150 
151 The HTPresentation and HTConverter types
152 
153    This HTPresentation structure represents a possible conversion algorithm
154    from one format to another.  It includes a pointer to a conversion routine.
155    The conversion routine returns a stream to which data should be fed.  See
156    also HTStreamStack which scans the list of registered converters and calls
157    one.  See the initialisation module for a list of conversion routines.
158 
159  */
160     typedef struct _HTPresentation HTPresentation;
161 
162     typedef HTStream *HTConverter (HTPresentation *pres,
163 				   HTParentAnchor *anchor,
164 				   HTStream *sink);
165 
166     struct _HTPresentation {
167 	HTAtom *rep;		/* representation name atomized */
168 	HTAtom *rep_out;	/* resulting representation */
169 	HTConverter *converter;	/* routine to gen the stream stack */
170 	char *command;		/* MIME-format command string */
171 	char *testcommand;	/* MIME-format test string */
172 	float quality;		/* Between 0 (bad) and 1 (good) */
173 	float secs;
174 	float secs_per_byte;
175 	off_t maxbytes;
176 	BOOL get_accept;	/* list in "Accept:" for GET */
177 	int accept_opt;		/* matches against LYAcceptMedia */
178     };
179 
180 /*
181 
182    The list of presentations is kept by this module.  It is also scanned by
183    modules which want to know the set of formats supported.  for example.
184 
185  */
186     extern HTList *HTPresentations;
187 
188 /*
189 
190    The default presentation is used when no other is appropriate
191 
192  */
193     extern HTPresentation *default_presentation;
194 
195 /*
196  * Options used for "Content-Type" string
197  */
198     typedef enum {
199 	contentBINARY = 0
200 	,contentTEXT
201 	,contentHTML
202     } ContentType;
203 
204 /*
205  * Options used for "Accept:" string
206  */
207     typedef enum {
208 	/* make the components powers of two so we can add them */
209 	mediaINT = 1		/* internal types predefined in HTInit.c */
210 	,mediaEXT = 2		/* external types predefined in HTInit.c */
211 	,mediaCFG = 4		/* types, e.g., viewers, from lynx.cfg */
212 	,mediaUSR = 8		/* user's mime-types, etc. */
213 	,mediaSYS = 16		/* system's mime-types, etc. */
214 	/* these are useful flavors for the options menu */
215 	,mediaOpt1 = mediaINT
216 	,mediaOpt2 = mediaINT + mediaCFG
217 	,mediaOpt3 = mediaINT + mediaCFG + mediaUSR
218 	,mediaOpt4 = mediaINT + mediaCFG + mediaUSR + mediaSYS
219 	/* this is the flavor from pre-2.8.6 */
220 	,mediaALL = mediaINT + mediaEXT + mediaCFG + mediaUSR + mediaSYS
221     } AcceptMedia;
222 
223 /*
224  * Options used for "Accept-Encoding:" string
225  */
226     typedef enum {
227 	encodingNONE = 0
228 	,encodingGZIP = 1
229 	,encodingDEFLATE = 2
230 	,encodingCOMPRESS = 4
231 	,encodingBZIP2 = 8
232 	,encodingALL = (encodingGZIP
233 			+ encodingDEFLATE
234 			+ encodingCOMPRESS
235 			+ encodingBZIP2)
236     } AcceptEncoding;
237 
238 /*
239 
240 HTSetPresentation: Register a system command to present a format
241 
242   ON ENTRY,
243 
244   rep                     is the MIME - style format name
245 
246   command                 is the MAILCAP - style command template
247 
248   testcommand             is the MAILCAP - style testcommand template
249 
250   quality                 A degradation faction 0..1.0
251 
252   secs                    A limit on the time user will wait (0.0 for infinity)
253   secs_per_byte
254 
255   maxbytes                A limit on the length acceptable as input (0 infinite)
256 
257   media                   Used in filtering presentation types for "Accept:"
258 
259  */
260     extern void HTSetPresentation(const char *representation,
261 				  const char *command,
262 				  const char *testcommand,
263 				  double quality,
264 				  double secs,
265 				  double secs_per_byte,
266 				  long int maxbytes,
267 				  AcceptMedia media
268     );
269 
270 /*
271 
272 HTSetConversion:   Register a conversion routine
273 
274   ON ENTRY,
275 
276   rep_in                  is the content-type input
277 
278   rep_out                 is the resulting content-type
279 
280   converter               is the routine to make the stream to do it
281 
282  */
283 
284     extern void HTSetConversion(const char *rep_in,
285 				const char *rep_out,
286 				HTConverter *converter,
287 				double quality,
288 				double secs,
289 				double secs_per_byte,
290 				long int maxbytes,
291 				AcceptMedia media
292     );
293 
294 /*
295 
296 HTStreamStack:   Create a stack of streams
297 
298    This is the routine which actually sets up the conversion.  It currently
299    checks only for direct conversions, but multi-stage conversions are forseen.
300    It takes a stream into which the output should be sent in the final format,
301    builds the conversion stack, and returns a stream into which the data in the
302    input format should be fed.  The anchor is passed because hypertxet objects
303    load information into the anchor object which represents them.
304 
305  */
306     extern HTStream *HTStreamStack(HTFormat format_in,
307 				   HTFormat format_out,
308 				   HTStream *stream_out,
309 				   HTParentAnchor *anchor);
310 
311 /*
312 HTReorderPresentation: put presentation near head of list
313 
314     Look up a presentation (exact match only) and, if found, reorder it to the
315     start of the HTPresentations list.  - kw
316     */
317 
318     extern void HTReorderPresentation(HTFormat format_in,
319 				      HTFormat format_out);
320 
321 /*
322  * Setup 'get_accept' flag to denote presentations that are not redundant,
323  * and will be listed in "Accept:" header.
324  */
325     extern void HTFilterPresentations(void);
326 
327 /*
328 
329 HTStackValue: Find the cost of a filter stack
330 
331    Must return the cost of the same stack which HTStreamStack would set up.
332 
333   ON ENTRY,
334 
335   format_in               The format of the data to be converted
336 
337   format_out              The format required
338 
339   initial_value           The intrinsic "value" of the data before conversion on a scale
340                          from 0 to 1
341 
342   length                  The number of bytes expected in the input format
343 
344  */
345     extern float HTStackValue(HTFormat format_in,
346 			      HTFormat rep_out,
347 			      double initial_value,
348 			      long int length);
349 
350 #define NO_VALUE_FOUND  -1e20	/* returned if none found */
351 
352 /*	Display the page while transfer in progress
353  *	-------------------------------------------
354  *
355  *   Repaint the page only when necessary.
356  *   This is a traverse call for HText_pageDispaly() - it works!.
357  *
358  */
359     extern void HTDisplayPartial(void);
360 
361     extern void HTFinishDisplayPartial(void);
362 
363 /*
364 
365 HTCopy:  Copy a socket to a stream
366 
367    This is used by the protocol engines to send data down a stream, typically
368    one which has been generated by HTStreamStack.
369 
370  */
371     extern int HTCopy(HTParentAnchor *anchor,
372 		      int file_number,
373 		      void *handle,
374 		      HTStream *sink);
375 
376 /*
377 
378 HTFileCopy:  Copy a file to a stream
379 
380    This is used by the protocol engines to send data down a stream, typically
381    one which has been generated by HTStreamStack.  It is currently called by
382    HTParseFile
383 
384  */
385     extern int HTFileCopy(FILE *fp,
386 			  HTStream *sink);
387 
388 #ifdef USE_SOURCE_CACHE
389 /*
390 
391 HTMemCopy:  Copy a memory chunk to a stream
392 
393    This is used by the protocol engines to send data down a stream, typically
394    one which has been generated by HTStreamStack.  It is currently called by
395    HTParseMem
396 
397  */
398     extern int HTMemCopy(HTChunk *chunk,
399 			 HTStream *sink);
400 #endif
401 
402 /*
403 
404 HTCopyNoCR: Copy a socket to a stream, stripping CR characters.
405 
406    It is slower than HTCopy .
407 
408  */
409 
410     extern void HTCopyNoCR(HTParentAnchor *anchor,
411 			   int file_number,
412 			   HTStream *sink);
413 
414 /*
415 
416 Clear input buffer and set file number
417 
418    This routine and the one below provide simple character input from sockets.
419    (They are left over from the older architecture and may not be used very
420    much.) The existence of a common routine and buffer saves memory space in
421    small implementations.
422 
423  */
424     extern void HTInitInput(int file_number);
425 
426 /*
427 
428 Get next character from buffer
429 
430  */
431     extern int interrupted_in_htgetcharacter;
432     extern int HTGetCharacter(void);
433 
434 /*
435 
436 HTParseSocket: Parse a socket given its format
437 
438    This routine is called by protocol modules to load an object.  uses
439    HTStreamStack and the copy routines above.  Returns HT_LOADED if successful,
440    <0 if not.
441 
442  */
443     extern int HTParseSocket(HTFormat format_in,
444 			     HTFormat format_out,
445 			     HTParentAnchor *anchor,
446 			     int file_number,
447 			     HTStream *sink);
448 
449 /*
450 
451 HTParseFile: Parse a File through a file pointer
452 
453    This routine is called by protocols modules to load an object.  uses
454    HTStreamStack and HTFileCopy.  Returns HT_LOADED if successful, can also
455    return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
456 
457  */
458     extern int HTParseFile(HTFormat format_in,
459 			   HTFormat format_out,
460 			   HTParentAnchor *anchor,
461 			   FILE *fp,
462 			   HTStream *sink);
463 
464 #ifdef USE_SOURCE_CACHE
465 /*
466 
467 HTParseMem: Parse a document in memory
468 
469    This routine is called by protocols modules to load an object.  uses
470    HTStreamStack and HTMemCopy.  Returns HT_LOADED if successful, can also
471    return <0 for failure.
472 
473  */
474     extern int HTParseMem(HTFormat format_in,
475 			  HTFormat format_out,
476 			  HTParentAnchor *anchor,
477 			  HTChunk *chunk,
478 			  HTStream *sink);
479 #endif
480 
481 #ifdef USE_ZLIB
482 /*
483 HTParseGzFile: Parse a gzip'ed File through a file pointer
484 
485    This routine is called by protocols modules to load an object.  uses
486    HTStreamStack and HTGzFileCopy.  Returns HT_LOADED if successful, can also
487    return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
488  */
489     extern int HTParseGzFile(HTFormat format_in,
490 			     HTFormat format_out,
491 			     HTParentAnchor *anchor,
492 			     gzFile gzfp,
493 			     HTStream *sink);
494 
495 /*
496 HTParseZzFile: Parse a deflate'd File through a file pointer
497 
498    This routine is called by protocols modules to load an object.  uses
499    HTStreamStack and HTZzFileCopy.  Returns HT_LOADED if successful, can also
500    return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
501  */
502     extern int HTParseZzFile(HTFormat format_in,
503 			     HTFormat format_out,
504 			     HTParentAnchor *anchor,
505 			     FILE *zzfp,
506 			     HTStream *sink);
507 
508 #endif				/* USE_ZLIB */
509 
510 #ifdef USE_BZLIB
511 /*
512 HTParseBzFile: Parse a bzip2'ed File through a file pointer
513 
514    This routine is called by protocols modules to load an object.  uses
515    HTStreamStack and HTGzFileCopy.  Returns HT_LOADED if successful, can also
516    return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
517  */
518     extern int HTParseBzFile(HTFormat format_in,
519 			     HTFormat format_out,
520 			     HTParentAnchor *anchor,
521 			     BZFILE * bzfp,
522 			     HTStream *sink);
523 
524 #endif				/* USE_BZLIB */
525 
526 /*
527 
528 HTNetToText: Convert Net ASCII to local representation
529 
530    This is a filter stream suitable for taking text from a socket and passing
531    it into a stream which expects text in the local C representation.  It does
532    ASCII and newline conversion.  As usual, pass its output stream to it when
533    creating it.
534 
535  */
536     extern HTStream *HTNetToText(HTStream *sink);
537 
538 /*
539 
540 HTFormatInit: Set up default presentations and conversions
541 
542    These are defined in HTInit.c or HTSInit.c if these have been replaced.  If
543    you don't call this routine, and you don't define any presentations, then
544    this routine will automatically be called the first time a conversion is
545    needed.  However, if you explicitly add some conversions (eg using
546    HTLoadRules) then you may want also to explicitly call this to get the
547    defaults as well.
548 
549  */
550     extern void HTFormatInit(void);
551 
552 /*
553 
554 Epilogue
555 
556  */
557     extern BOOL HTOutputSource;	/* Flag: shortcut parser */
558 
559 #ifdef __cplusplus
560 }
561 #endif
562 #endif				/* HTFORMAT_H */
563