1 /* 2 * $LynxId: HTFormat.h,v 1.37 2020/01/21 22:02:59 tom Exp $ 3 * 4 * HTFormat: The format manager in the WWW Library 5 * MANAGE DIFFERENT DOCUMENT FORMATS 6 * 7 * Here we describe the functions of the HTFormat module which handles conversion between 8 * different data representations. (In MIME parlance, a representation is known as a 9 * content-type. In WWW the term "format" is often used as it is shorter). 10 * 11 * This module is implemented by HTFormat.c. This hypertext document is used to generate 12 * the HTFormat.h include file. Part of the WWW library. 13 */ 14 #ifndef HTFORMAT_H 15 #define HTFORMAT_H 16 17 #include <HTStream.h> 18 #include <HTAtom.h> 19 #include <HTList.h> 20 #include <HTAnchor.h> 21 22 #ifdef USE_SOURCE_CACHE 23 #include <HTChunk.h> 24 #endif 25 26 #ifdef USE_BZLIB 27 #include <bzlib.h> 28 #endif 29 30 #ifdef USE_ZLIB 31 #include <zlib.h> 32 #endif 33 34 #ifdef __cplusplus 35 extern "C" { 36 #endif 37 /* 38 39 These macros (which used to be constants) define some basic internally 40 referenced representations. The www/xxx ones are of course not MIME 41 standard. 42 43 www/source is an output format which leaves the input untouched. It is 44 useful for diagnostics, and for users who want to see the original, whatever 45 it is. 46 47 */ 48 /* Internal ones */ 49 /* #define WWW_SOURCE HTAtom_for("www/source") */ 50 /* Whatever it was originally */ 51 extern HTAtom *WWW_SOURCE; 52 /* calculated once, heavy used */ 53 54 /* 55 56 www/present represents the user's perception of the document. If you 57 convert to www/present, you present the material to the user. 58 59 */ 60 #define WWW_PRESENT HTAtom_for("www/present") /* The user's perception */ 61 62 #define WWW_DEBUG HTAtom_for("www/debug") 63 /* 64 65 WWW_DEBUG represents the user's perception of debug information, for example 66 sent as a HTML document in a HTTP redirection message. 67 68 */ 69 70 /* 71 72 The message/rfc822 format means a MIME message or a plain text message with 73 no MIME header. This is what is returned by an HTTP server. 74 75 */ 76 #define WWW_MIME HTAtom_for("www/mime") /* A MIME message */ 77 78 /* 79 For parsing only the header. - kw 80 */ 81 #define WWW_MIME_HEAD HTAtom_for("message/x-rfc822-head") 82 83 /* 84 85 www/print is like www/present except it represents a printed copy. 86 87 */ 88 #define WWW_PRINT HTAtom_for("www/print") /* A printed copy */ 89 90 /* 91 92 www/unknown is a really unknown type. Some default action is appropriate. 93 94 */ 95 #define WWW_UNKNOWN HTAtom_for("www/unknown") 96 97 #ifdef DIRED_SUPPORT 98 /* 99 www/dired signals directory edit mode. 100 */ 101 #define WWW_DIRED HTAtom_for("www/dired") 102 #endif 103 104 /* 105 106 These are regular MIME types. HTML is assumed to be added by the W3 code. 107 application/octet-stream was mistakenly application/binary in earlier libwww 108 versions (pre 2.11). 109 110 */ 111 #define STR_BINARY "application/octet-stream" 112 #define STR_PLAINTEXT "text/plain" 113 #define STR_HTML "text/html" 114 115 #define WWW_BINARY HTAtom_for(STR_BINARY) 116 #define WWW_PLAINTEXT HTAtom_for(STR_PLAINTEXT) 117 #define WWW_HTML HTAtom_for(STR_HTML) 118 119 #define WWW_POSTSCRIPT HTAtom_for("application/postscript") 120 #define WWW_RICHTEXT HTAtom_for("application/rtf") 121 #define WWW_AUDIO HTAtom_for("audio/basic") 122 123 typedef HTAtom *HTEncoding; 124 125 /* 126 * The following are values for the MIME types: 127 */ 128 #define WWW_ENC_7BIT HTAtom_for("7bit") 129 #define WWW_ENC_8BIT HTAtom_for("8bit") 130 #define WWW_ENC_BINARY HTAtom_for("binary") 131 132 /* 133 * We also add 134 */ 135 #define WWW_ENC_COMPRESS HTAtom_for("compress") 136 137 /* 138 * Does a string designate a real encoding, or is it just 139 * a "dummy" as for example 7bit, 8bit, and binary? 140 */ 141 #define IsUnityEncStr(senc) \ 142 ((senc)==NULL || *(senc)=='\0' || !strcmp(senc,"identity") ||\ 143 !strcmp(senc,"8bit") || !strcmp(senc,"binary") || !strcmp(senc,"7bit")) 144 145 #define IsUnityEnc(enc) \ 146 ((enc)==NULL || (enc)==HTAtom_for("identity") ||\ 147 (enc)==WWW_ENC_8BIT || (enc)==WWW_ENC_BINARY || (enc)==WWW_ENC_7BIT) 148 149 /* 150 151 The HTPresentation and HTConverter types 152 153 This HTPresentation structure represents a possible conversion algorithm 154 from one format to another. It includes a pointer to a conversion routine. 155 The conversion routine returns a stream to which data should be fed. See 156 also HTStreamStack which scans the list of registered converters and calls 157 one. See the initialisation module for a list of conversion routines. 158 159 */ 160 typedef struct _HTPresentation HTPresentation; 161 162 typedef HTStream *HTConverter (HTPresentation *pres, 163 HTParentAnchor *anchor, 164 HTStream *sink); 165 166 struct _HTPresentation { 167 HTAtom *rep; /* representation name atomized */ 168 HTAtom *rep_out; /* resulting representation */ 169 HTConverter *converter; /* routine to gen the stream stack */ 170 char *command; /* MIME-format command string */ 171 char *testcommand; /* MIME-format test string */ 172 float quality; /* Between 0 (bad) and 1 (good) */ 173 float secs; 174 float secs_per_byte; 175 off_t maxbytes; 176 BOOL get_accept; /* list in "Accept:" for GET */ 177 int accept_opt; /* matches against LYAcceptMedia */ 178 }; 179 180 /* 181 182 The list of presentations is kept by this module. It is also scanned by 183 modules which want to know the set of formats supported. for example. 184 185 */ 186 extern HTList *HTPresentations; 187 188 /* 189 190 The default presentation is used when no other is appropriate 191 192 */ 193 extern HTPresentation *default_presentation; 194 195 /* 196 * Options used for "Content-Type" string 197 */ 198 typedef enum { 199 contentBINARY = 0 200 ,contentTEXT 201 ,contentHTML 202 } ContentType; 203 204 /* 205 * Options used for "Accept:" string 206 */ 207 typedef enum { 208 /* make the components powers of two so we can add them */ 209 mediaINT = 1 /* internal types predefined in HTInit.c */ 210 ,mediaEXT = 2 /* external types predefined in HTInit.c */ 211 ,mediaCFG = 4 /* types, e.g., viewers, from lynx.cfg */ 212 ,mediaUSR = 8 /* user's mime-types, etc. */ 213 ,mediaSYS = 16 /* system's mime-types, etc. */ 214 /* these are useful flavors for the options menu */ 215 ,mediaOpt1 = mediaINT 216 ,mediaOpt2 = mediaINT + mediaCFG 217 ,mediaOpt3 = mediaINT + mediaCFG + mediaUSR 218 ,mediaOpt4 = mediaINT + mediaCFG + mediaUSR + mediaSYS 219 /* this is the flavor from pre-2.8.6 */ 220 ,mediaALL = mediaINT + mediaEXT + mediaCFG + mediaUSR + mediaSYS 221 } AcceptMedia; 222 223 /* 224 * Options used for "Accept-Encoding:" string 225 */ 226 typedef enum { 227 encodingNONE = 0 228 ,encodingGZIP = 1 229 ,encodingDEFLATE = 2 230 ,encodingCOMPRESS = 4 231 ,encodingBZIP2 = 8 232 ,encodingALL = (encodingGZIP 233 + encodingDEFLATE 234 + encodingCOMPRESS 235 + encodingBZIP2) 236 } AcceptEncoding; 237 238 /* 239 240 HTSetPresentation: Register a system command to present a format 241 242 ON ENTRY, 243 244 rep is the MIME - style format name 245 246 command is the MAILCAP - style command template 247 248 testcommand is the MAILCAP - style testcommand template 249 250 quality A degradation faction 0..1.0 251 252 secs A limit on the time user will wait (0.0 for infinity) 253 secs_per_byte 254 255 maxbytes A limit on the length acceptable as input (0 infinite) 256 257 media Used in filtering presentation types for "Accept:" 258 259 */ 260 extern void HTSetPresentation(const char *representation, 261 const char *command, 262 const char *testcommand, 263 double quality, 264 double secs, 265 double secs_per_byte, 266 long int maxbytes, 267 AcceptMedia media 268 ); 269 270 /* 271 272 HTSetConversion: Register a conversion routine 273 274 ON ENTRY, 275 276 rep_in is the content-type input 277 278 rep_out is the resulting content-type 279 280 converter is the routine to make the stream to do it 281 282 */ 283 284 extern void HTSetConversion(const char *rep_in, 285 const char *rep_out, 286 HTConverter *converter, 287 double quality, 288 double secs, 289 double secs_per_byte, 290 long int maxbytes, 291 AcceptMedia media 292 ); 293 294 /* 295 296 HTStreamStack: Create a stack of streams 297 298 This is the routine which actually sets up the conversion. It currently 299 checks only for direct conversions, but multi-stage conversions are forseen. 300 It takes a stream into which the output should be sent in the final format, 301 builds the conversion stack, and returns a stream into which the data in the 302 input format should be fed. The anchor is passed because hypertxet objects 303 load information into the anchor object which represents them. 304 305 */ 306 extern HTStream *HTStreamStack(HTFormat format_in, 307 HTFormat format_out, 308 HTStream *stream_out, 309 HTParentAnchor *anchor); 310 311 /* 312 HTReorderPresentation: put presentation near head of list 313 314 Look up a presentation (exact match only) and, if found, reorder it to the 315 start of the HTPresentations list. - kw 316 */ 317 318 extern void HTReorderPresentation(HTFormat format_in, 319 HTFormat format_out); 320 321 /* 322 * Setup 'get_accept' flag to denote presentations that are not redundant, 323 * and will be listed in "Accept:" header. 324 */ 325 extern void HTFilterPresentations(void); 326 327 /* 328 329 HTStackValue: Find the cost of a filter stack 330 331 Must return the cost of the same stack which HTStreamStack would set up. 332 333 ON ENTRY, 334 335 format_in The format of the data to be converted 336 337 format_out The format required 338 339 initial_value The intrinsic "value" of the data before conversion on a scale 340 from 0 to 1 341 342 length The number of bytes expected in the input format 343 344 */ 345 extern float HTStackValue(HTFormat format_in, 346 HTFormat rep_out, 347 double initial_value, 348 long int length); 349 350 #define NO_VALUE_FOUND -1e20 /* returned if none found */ 351 352 /* Display the page while transfer in progress 353 * ------------------------------------------- 354 * 355 * Repaint the page only when necessary. 356 * This is a traverse call for HText_pageDispaly() - it works!. 357 * 358 */ 359 extern void HTDisplayPartial(void); 360 361 extern void HTFinishDisplayPartial(void); 362 363 /* 364 365 HTCopy: Copy a socket to a stream 366 367 This is used by the protocol engines to send data down a stream, typically 368 one which has been generated by HTStreamStack. 369 370 */ 371 extern int HTCopy(HTParentAnchor *anchor, 372 int file_number, 373 void *handle, 374 HTStream *sink); 375 376 /* 377 378 HTFileCopy: Copy a file to a stream 379 380 This is used by the protocol engines to send data down a stream, typically 381 one which has been generated by HTStreamStack. It is currently called by 382 HTParseFile 383 384 */ 385 extern int HTFileCopy(FILE *fp, 386 HTStream *sink); 387 388 #ifdef USE_SOURCE_CACHE 389 /* 390 391 HTMemCopy: Copy a memory chunk to a stream 392 393 This is used by the protocol engines to send data down a stream, typically 394 one which has been generated by HTStreamStack. It is currently called by 395 HTParseMem 396 397 */ 398 extern int HTMemCopy(HTChunk *chunk, 399 HTStream *sink); 400 #endif 401 402 /* 403 404 HTCopyNoCR: Copy a socket to a stream, stripping CR characters. 405 406 It is slower than HTCopy . 407 408 */ 409 410 extern void HTCopyNoCR(HTParentAnchor *anchor, 411 int file_number, 412 HTStream *sink); 413 414 /* 415 416 Clear input buffer and set file number 417 418 This routine and the one below provide simple character input from sockets. 419 (They are left over from the older architecture and may not be used very 420 much.) The existence of a common routine and buffer saves memory space in 421 small implementations. 422 423 */ 424 extern void HTInitInput(int file_number); 425 426 /* 427 428 Get next character from buffer 429 430 */ 431 extern int interrupted_in_htgetcharacter; 432 extern int HTGetCharacter(void); 433 434 /* 435 436 HTParseSocket: Parse a socket given its format 437 438 This routine is called by protocol modules to load an object. uses 439 HTStreamStack and the copy routines above. Returns HT_LOADED if successful, 440 <0 if not. 441 442 */ 443 extern int HTParseSocket(HTFormat format_in, 444 HTFormat format_out, 445 HTParentAnchor *anchor, 446 int file_number, 447 HTStream *sink); 448 449 /* 450 451 HTParseFile: Parse a File through a file pointer 452 453 This routine is called by protocols modules to load an object. uses 454 HTStreamStack and HTFileCopy. Returns HT_LOADED if successful, can also 455 return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. 456 457 */ 458 extern int HTParseFile(HTFormat format_in, 459 HTFormat format_out, 460 HTParentAnchor *anchor, 461 FILE *fp, 462 HTStream *sink); 463 464 #ifdef USE_SOURCE_CACHE 465 /* 466 467 HTParseMem: Parse a document in memory 468 469 This routine is called by protocols modules to load an object. uses 470 HTStreamStack and HTMemCopy. Returns HT_LOADED if successful, can also 471 return <0 for failure. 472 473 */ 474 extern int HTParseMem(HTFormat format_in, 475 HTFormat format_out, 476 HTParentAnchor *anchor, 477 HTChunk *chunk, 478 HTStream *sink); 479 #endif 480 481 #ifdef USE_ZLIB 482 /* 483 HTParseGzFile: Parse a gzip'ed File through a file pointer 484 485 This routine is called by protocols modules to load an object. uses 486 HTStreamStack and HTGzFileCopy. Returns HT_LOADED if successful, can also 487 return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. 488 */ 489 extern int HTParseGzFile(HTFormat format_in, 490 HTFormat format_out, 491 HTParentAnchor *anchor, 492 gzFile gzfp, 493 HTStream *sink); 494 495 /* 496 HTParseZzFile: Parse a deflate'd File through a file pointer 497 498 This routine is called by protocols modules to load an object. uses 499 HTStreamStack and HTZzFileCopy. Returns HT_LOADED if successful, can also 500 return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. 501 */ 502 extern int HTParseZzFile(HTFormat format_in, 503 HTFormat format_out, 504 HTParentAnchor *anchor, 505 FILE *zzfp, 506 HTStream *sink); 507 508 #endif /* USE_ZLIB */ 509 510 #ifdef USE_BZLIB 511 /* 512 HTParseBzFile: Parse a bzip2'ed File through a file pointer 513 514 This routine is called by protocols modules to load an object. uses 515 HTStreamStack and HTGzFileCopy. Returns HT_LOADED if successful, can also 516 return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. 517 */ 518 extern int HTParseBzFile(HTFormat format_in, 519 HTFormat format_out, 520 HTParentAnchor *anchor, 521 BZFILE * bzfp, 522 HTStream *sink); 523 524 #endif /* USE_BZLIB */ 525 526 /* 527 528 HTNetToText: Convert Net ASCII to local representation 529 530 This is a filter stream suitable for taking text from a socket and passing 531 it into a stream which expects text in the local C representation. It does 532 ASCII and newline conversion. As usual, pass its output stream to it when 533 creating it. 534 535 */ 536 extern HTStream *HTNetToText(HTStream *sink); 537 538 /* 539 540 HTFormatInit: Set up default presentations and conversions 541 542 These are defined in HTInit.c or HTSInit.c if these have been replaced. If 543 you don't call this routine, and you don't define any presentations, then 544 this routine will automatically be called the first time a conversion is 545 needed. However, if you explicitly add some conversions (eg using 546 HTLoadRules) then you may want also to explicitly call this to get the 547 defaults as well. 548 549 */ 550 extern void HTFormatInit(void); 551 552 /* 553 554 Epilogue 555 556 */ 557 extern BOOL HTOutputSource; /* Flag: shortcut parser */ 558 559 #ifdef __cplusplus 560 } 561 #endif 562 #endif /* HTFORMAT_H */ 563