1 /* ---------------------------------------------------------------------- 2 MIME Parser -- single pass MIME parser 3 4 Laurence Lundblade <lgl@qualcomm.com> 5 6 Copyright 2000 QUALCOMM Incorporated. All rights reserved. 7 8 File: mime.h 9 Version: 0.2.7 April, 2000 10 Last Edited: Apr 26, 2000 11 12 ---- */ 13 14 15 #ifndef _MIMEINCLUDED 16 #define _MIMEINCLUDED 17 18 #include "config.h" 19 #include <stdio.h> 20 /* ====================================================================== 21 C O N F I G S T U F F 22 ====================================================================== */ 23 /* ---------------------------------------------------------------------- 24 Deepest multipart nesting handled. Parts deeper are ignored and an 25 error code is given. Increasing this increases stored state by about 26 100 bytes/level, depending on other constants below. 27 --- */ 28 #define kMaxMIMENesting (5) 29 30 31 /* ---------------------------------------------------------------------- 32 Set by RFC 2046. Don't change this. 33 --- */ 34 #define kMIMEBoundaryLen (76) 35 36 37 /* ---------------------------------------------------------------------- 38 We choose largest token (e.g. parameter value) we handle as the 39 boundary parameter. Could be larger if we want. 40 --- */ 41 #define kMIMETokenLen kMIMEBoundaryLen 42 43 44 /* ---------------------------------------------------------------------- 45 Size of our internal input buffering. This must be at least 46 kMIMEBoundaryLen plus the end of line length plus one. It can be much 47 larger if you can afford it, with some small gain in efficiency by 48 way of fewer callbacks. 49 --- */ 50 #define kInBufSize (100) 51 52 53 54 55 /* ====================================================================== 56 P U B L I C I N T E R F A C E, Part 1 57 ====================================================================== */ 58 59 /* ---------------------------------------------------------------------- 60 Structures to store MIME type info 61 62 This structure is such that we can nil it out by clearing everything 63 to zero, which makes our code smaller and faster. 64 65 The ordering of most of the enums here is related to the order 66 of string constants in the parsing code (don't change them) 67 68 We ignore some MIME headers such as content-md5, and content description 69 70 Parameters are handled a it oddly to drastically reduce stored state. 71 We only are interested in a small number of them (e.g, boundary 72 charset, Filename), and only handle a limited number per mime type. 73 This is possible because most MIME types have less than some number 74 (e.g. 2) parameters that we are interested in. 75 76 The parameters from the content-disposition header are merged with 77 the content-type parameters. 78 79 Parameter values, which are potentially arbitrarily long, are truncated 80 at kMIMETokenLen. 81 82 Parameter values can be looked up quickly by indexing by MimeParamKind 83 into paramIndex to get the index of the value in the param array. 84 (This makes code to find a parameter of interest in the list very small) 85 ---- */ 86 typedef enum {majorTypeNone=0, majorTypeUnknown, majorTypeAudio, 87 majorTypeImage, majorTypeVideo, majorTypeModel, majorTypeMulti, 88 majorTypeApp, majorTypeMsg, majorTypeText} 89 MajorTypeKind; 90 91 typedef enum {cte7bit=0, cteBinary, cte8bit, 92 cteQP, cteBase64, cteBadCTE} CteKind; 93 94 typedef enum {paramNone=0, paramBoundary, paramCharset, paramType, 95 paramFilename, paramTypes, paramFormat, paramExtType, 96 paramExtSize, paramExtSite, paramExtName, paramExtDir, 97 paramExtMode, paramMax} MimeParamKind; 98 99 100 #define kParamMaxNum (2) /* Total number of instances */ 101 #define kParamIndexUnused (255) /* don't change this */ 102 103 104 typedef struct { 105 MajorTypeKind majorType; 106 char minorType[kMIMETokenLen]; 107 enum {dispNone=0, dispInline, dispAttach} 108 contentDisp; 109 CteKind cte; 110 struct { 111 MimeParamKind name; 112 char value[kMIMETokenLen]; 113 } param[kParamMaxNum]; 114 unsigned char paramIndex[paramMax]; 115 unsigned char nextParamPos; /* Private - used during parsing only! */ 116 } MimeTypeType, *MimeTypePtr; 117 118 119 /* ---------------------------------------------------------------------- 120 This structure describes the nesting of multi part types so a 121 MIME type handler can know where it is to make type handling decisions 122 (e.g., multipart/alternative handling). The nestLevel element is the 123 current nesting level. It is 0 in the outside 822 body, 1 for the 124 elements of the first multipart, etc. It can be used to index into 125 attribs to get to attributes of the current nesting level(attribs[0] 126 corresponds to nest level 1, since there are no attributes for level 0). 127 (Be sure nestLevel > 0 before you go attribs[nestLevel-1].) The 128 attributes include the type of the multipart, and the ordinal number 129 of the part being processed. The multipart types are again limited 130 to a select few to save space. 131 --- */ 132 typedef enum {multNuthinSpecial=0, multAlt, multAppleII, multReport, 133 multRelated} MultTypeKind; 134 135 typedef struct { 136 unsigned char nestLevel; 137 struct { 138 unsigned char partNum; 139 MultTypeKind mult; 140 signed char LookAHead; /* Look a head for all text parts in 141 alternative */ 142 unsigned long offset; /* Preserve the offset in file when 143 look a head starts */ 144 unsigned long lines; /* lines to go in message since lookahead 145 started. */ 146 } attribs[kMaxMIMENesting]; 147 } MultAttribsType, *MultAttribsPtr; 148 149 150 151 152 /* ---------------------------------------------------------------------- 153 This is a prototype for a functions supplied to handle a particular 154 MIME type. The user of this MIME parser defines functions for handling 155 various types and pass the function to the MIME parser. The MIME parser 156 then calls back the functions on buffers of data from the entity. 157 158 The transfer encoding is removed before being passed to this call back. 159 The data passed to this call back may be binary, and is not guaranteed 160 to be NULL terminated. The function may modify the buffer passed to 161 it. It of course can't assume its bigger then len. 162 163 Sample handlers are the text/enriched and text/html strippers. 164 165 The last call to this function for a part has buf set to NULL to indicate 166 handling of the part is complete. 167 --- */ 168 typedef void OutputFn(void *oFnState, char *buf, long len); 169 170 171 /* ---------------------------------------------------------------------- 172 This is the call back supplied by the user to the MIME parser for 173 handling non-MIME headers. The function is called with the contents 174 of each header. The isContination parameter will be 1 if the buffer 175 is a continuation of the previous header. The state parameter 176 is a pointer the caller initializes in the MimeInit call. 177 178 The blank line that separates the header from the body IS passed, and 179 signals that all headers for a MIME entity have been passed, and that 180 there is no continuation for the last header. 181 182 ---*/ 183 typedef void Rfc822Fn(void *state, 184 char *header, 185 long len, 186 unsigned char isContinuation); 187 188 189 /* ---------------------------------------------------------------------- 190 Another call back supplied by the user. This one is called for each 191 MIME entity the parser encounters. The function is given the mime 192 type and multipart attributes. It must supply an output function 193 and a context/state to be passed to the output function. 194 195 The very first call to this function has a NULL mType and multAttribs, 196 and is to get the headerFn for very top level headers. At that point no 197 MIME type is known. The outFn pointer will be NULL too, so don't 198 try and pass that back. 199 200 This is where all the real handling happens. 201 202 Args: typeMapperState - The state passed to MimeInit 203 mError - An error code for current part 204 mType - Mime type of current part 205 multAttribs - Describes nesting 206 outFn - Place to return content handling function 207 outFnState - State for content handling function 208 headerFn - Header outputing function 209 headerFnState - Header outputing state 210 --- */ 211 typedef enum {merrorNone, merrorTooDeep, merrorBadCTE, 212 merrorNoBoundary} MimeErrorKind; 213 214 typedef void MimeTypeFn( 215 void *state, 216 const MimeErrorKind mError, 217 const MimeTypePtr mType, 218 const MultAttribsPtr multAttribs, 219 OutputFn **outFnConstructor, 220 OutputFn **outFn, 221 OutputFn **outFnDestructor, 222 void **outFnState, 223 Rfc822Fn **headerFn, 224 void **headerFnState); 225 226 227 /* ================= E N D P U B L I C S T U F F ================ */ 228 229 230 231 /* ====================================================================== 232 P R I V A T E S T U F F 233 ====================================================================== */ 234 /* ---------------------------------------------------------------------- 235 Keep track of state of rfc822 lexer. Can initialize this structure 236 by zeroing all bytes. 237 ---- */ 238 typedef struct { 239 unsigned char parenNesting; /* Level of parenthesis nesting */ 240 unsigned char inQuotes; /* Are we in a quoted part? */ 241 unsigned char dontDownCase; /* Don't make lower when copying tokens */ 242 unsigned char gettingToken; /* Not looking for space */ 243 unsigned char tokenLen; /* Length of token we've buffered up */ 244 } Rfc822LexType; 245 246 247 /* ---------------------------------------------------------------------- 248 Keep track of MIME header parser state 249 ---- */ 250 typedef struct { 251 Rfc822LexType lexState; 252 enum {wantMajor, wantSlash, wantMinor, wantSemi, wantParamName, 253 wantEqual, wantParamValue, wantDisp, wantDesc, wantCTE, 254 wantNothing} parseState; 255 MimeParamKind paramWanted; 256 char tokenBuf[kMIMETokenLen+1]; 257 } HeaderParseType; 258 259 260 /* ---------------------------------------------------------------------- 261 One structure to hold the state for all the different content-transfer 262 decoders 263 ---- */ 264 typedef struct 265 { 266 OutputFn *outFn; 267 OutputFn *outFnCtor; 268 OutputFn *outFnDtor; 269 void *outFnState; 270 union { 271 struct { 272 char lastWasEqual; 273 char is2047; 274 } QP; 275 struct { 276 unsigned long threeBytes; 277 signed char shift; 278 char pad; 279 } B64; 280 } x; 281 } MimeCteType, *MimeCtePtr; 282 283 typedef void MimeCteFn(MimeCtePtr state, char *buffer, long bufferLen); 284 285 286 /* ---------------------------------------------------------------------- 287 The main structure for storing all our junk 288 --- */ 289 typedef struct { 290 /* ---- Track boundaries and major MIME parser state ---- */ 291 signed char currentBoundary; 292 char mimeBoundaries[kMaxMIMENesting][kMIMEBoundaryLen]; 293 enum {parseHeaders=0, parseBodyPart, parsePartStart, parseMimeHeaders} 294 msgPartState; /* State for multipart and message types */ 295 296 /* ---- General header parser state ---- */ 297 enum {wantedHeader=0, mimeHeader, unwantedHeader, rfc822Header} 298 headerDisposition; 299 unsigned char isMIME; 300 unsigned char lastHadNoEOL; 301 char gotTopHeaderHandler; 302 303 /* ---- MIME type of current part ---- */ 304 MimeTypeType mType; 305 306 /* ---- Track the multipart nesting that got us to where we are ---- */ 307 MultAttribsType multAttribs; 308 309 /* ---- MIME header (Content-*) parser state ---- */ 310 HeaderParseType mHeaderState; 311 312 /* ---- Stuff to handle CTE ---- */ 313 /* ---- The CTE handler state includes specific data handler ---- */ 314 enum {outputCTE, outputNone, outputDirect} outputTag; 315 OutputFn *cteHandlerConstructor; 316 OutputFn *cteHandlerDestructor; 317 union { 318 OutputFn *outHandler; 319 MimeCteFn *cteHandler; 320 } output; 321 union { 322 MimeCteType cteState; /* When we actually do CTE */ 323 void *oFnState; /* When we go direct to output handler */ 324 } outState; 325 326 /* --- The mime type mapper call back --- */ 327 MimeTypeFn *mapperFun; 328 void *mapperFunState; 329 Rfc822Fn *headerHandler; 330 void *headerHandlerState; 331 332 /* --- handle fact that trailing newline may be part of boundary --- */ 333 char savedNewline; 334 335 /* --- input blocking to make boundary detection easier --- */ 336 char buf[kInBufSize];/*Buffer input once for boundary checking*/ 337 short bufLen; 338 FILE *mdrop; 339 } MimeParseType, *MimeParsePtr; 340 341 /* ================= E N D P R I V A T E S T U F F ================ */ 342 343 344 345 346 /* ====================================================================== 347 P U B L I C I N T E R F A C E, Part 2 348 ====================================================================== */ 349 350 351 352 /* ---------------------------------------------------------------------- 353 Initialize the MIME mangler - call this at start of every message 354 355 Args: typeMapper - A function to be called for each MIME type (see above) 356 typeMapperState - a pointer/context passed to each mapper call 357 358 Returns: pointer to Mime parser state/context or NULL 359 ---- */ 360 361 MimeParsePtr MimeInit __PROTO(( 362 MimeTypeFn *typeMapper, 363 void *typeMapperState, 364 FILE *mbox)); 365 366 367 368 /* ---------------------------------------------------------------------- 369 Done with MIME parser - clean up 370 371 Args: s - State/context 372 373 Bug: This ought to flush any buffered input 374 ---- */ 375 void MimeFinish __PROTO((MimeParsePtr s)); 376 377 378 379 /*---------------------------------------------------------------------- 380 Actually do the MIME parser 381 382 Args: state - State/context returned from the MimeInit call 383 inBuf - a buffer of input to process 384 inBufLen - length of the input buffer 385 386 Returns: nothing 387 388 The input buffers may be any size from byte at a type to megabyte 389 buffers at a time. The input may also be binary MIME. 390 391 Calls here result in calls to the typeMapper function, which results 392 in calls to the output function supplied by the type mapper. 393 ---- */ 394 void MimeInput __PROTO(( 395 MimeParsePtr state, 396 const char *inBuf, 397 unsigned long inBufLen)); 398 399 400 #endif /* _MIMEINCLUDED */ 401 402 403 404