1 /* ----------------------------------------------------------------------
2      MIME Parser -- single pass MIME parser
3 
4      Laurence Lundblade <lgl@qualcomm.com>
5 
6      Copyright 2000 QUALCOMM Incorporated.  All rights reserved.
7 
8      File: mime.h
9      Version: 0.2.7   April, 2000
10      Last Edited: Apr 26, 2000
11 
12   ---- */
13 
14 
15 #ifndef _MIMEINCLUDED
16 #define _MIMEINCLUDED
17 
18 #include "config.h"
19 #include <stdio.h>
20 /* ======================================================================
21      C O N F I G    S T U F F
22    ====================================================================== */
23 /* ----------------------------------------------------------------------
24    Deepest multipart nesting handled. Parts deeper are ignored and an
25    error code is given. Increasing this increases stored state by about
26    100 bytes/level, depending on other constants below.
27    --- */
28 #define kMaxMIMENesting (5)
29 
30 
31 /* ----------------------------------------------------------------------
32    Set by RFC 2046. Don't change this.
33    --- */
34 #define kMIMEBoundaryLen (76)
35 
36 
37 /* ----------------------------------------------------------------------
38    We choose largest token (e.g. parameter value) we handle as the
39    boundary parameter. Could be larger if we want.
40    --- */
41 #define kMIMETokenLen  kMIMEBoundaryLen
42 
43 
44 /* ----------------------------------------------------------------------
45    Size of our internal input buffering. This must be at least
46    kMIMEBoundaryLen plus the end of line length plus one. It can be much
47    larger if you can afford it, with some small gain in efficiency by
48    way of fewer callbacks.
49    --- */
50 #define kInBufSize  (100)
51 
52 
53 
54 
55 /* ======================================================================
56      P U B L I C   I N T E R F A C E, Part 1
57    ====================================================================== */
58 
59 /* ----------------------------------------------------------------------
60     Structures to store MIME type info
61 
62     This structure is such that we can nil it out by clearing everything
63     to zero, which makes our code smaller and faster.
64 
65     The ordering of most of the enums here is related to the order
66     of string constants in the parsing code (don't change them)
67 
68     We ignore some MIME headers such as content-md5, and content description
69 
70     Parameters are handled a it oddly to drastically reduce stored state.
71     We only are interested in a small number of them (e.g, boundary
72     charset, Filename), and only handle a limited number per mime type.
73     This is possible because most MIME types have less than some number
74     (e.g. 2) parameters that we are interested in.
75 
76     The parameters from the content-disposition header are merged with
77     the content-type parameters.
78 
79     Parameter values, which are potentially arbitrarily long, are truncated
80     at kMIMETokenLen.
81 
82     Parameter values can be looked up quickly by indexing by MimeParamKind
83     into paramIndex to get the index of the value in the param array.
84     (This makes code to find a parameter of interest in the list very small)
85    ---- */
86 typedef enum {majorTypeNone=0, majorTypeUnknown, majorTypeAudio,
87               majorTypeImage, majorTypeVideo, majorTypeModel, majorTypeMulti,
88               majorTypeApp, majorTypeMsg, majorTypeText}
89              MajorTypeKind;
90 
91 typedef enum {cte7bit=0, cteBinary, cte8bit,
92               cteQP, cteBase64, cteBadCTE} CteKind;
93 
94 typedef enum {paramNone=0, paramBoundary, paramCharset, paramType,
95               paramFilename, paramTypes, paramFormat, paramExtType,
96               paramExtSize, paramExtSite, paramExtName, paramExtDir,
97               paramExtMode, paramMax} MimeParamKind;
98 
99 
100 #define kParamMaxNum        (2) /* Total number of instances */
101 #define kParamIndexUnused (255) /* don't change this */
102 
103 
104 typedef struct {
105   MajorTypeKind   majorType;
106   char            minorType[kMIMETokenLen];
107   enum {dispNone=0, dispInline, dispAttach}
108                   contentDisp;
109   CteKind         cte;
110   struct {
111     MimeParamKind name;
112     char          value[kMIMETokenLen];
113   }               param[kParamMaxNum];
114   unsigned char   paramIndex[paramMax];
115   unsigned char   nextParamPos; /* Private - used during parsing only! */
116 } MimeTypeType, *MimeTypePtr;
117 
118 
119 /* ----------------------------------------------------------------------
120    This structure describes the nesting of multi part types so a
121    MIME type handler can know where it is to make type handling decisions
122    (e.g., multipart/alternative handling).  The nestLevel element is the
123    current nesting level. It is 0 in the outside 822 body, 1 for the
124    elements of the first multipart, etc. It can be used to index into
125    attribs to get to attributes of the current nesting level(attribs[0]
126    corresponds to nest level 1, since there are no attributes for level 0).
127    (Be sure nestLevel > 0 before you go attribs[nestLevel-1].) The
128    attributes include the type of the multipart, and the ordinal number
129    of the part being processed. The multipart types are again limited
130    to a select few to save space.
131   --- */
132 typedef enum {multNuthinSpecial=0, multAlt, multAppleII, multReport,
133 	      multRelated} MultTypeKind;
134 
135 typedef struct {
136   unsigned char nestLevel;
137   struct {
138     unsigned char partNum;
139     MultTypeKind  mult;
140     signed char   LookAHead;     /* Look a head for all text parts in
141 				    alternative  */
142     unsigned long offset;        /* Preserve the offset in file when
143 				    look a head starts */
144     unsigned long lines;         /* lines to go in message since lookahead
145 				    started. */
146   } attribs[kMaxMIMENesting];
147 } MultAttribsType, *MultAttribsPtr;
148 
149 
150 
151 
152 /* ----------------------------------------------------------------------
153    This is a prototype for a functions supplied to handle a particular
154    MIME type. The user of this MIME parser defines functions for handling
155    various types and pass the function to the MIME parser. The MIME parser
156    then calls back the functions on buffers of data from the entity.
157 
158    The transfer encoding is removed before being passed to this call back.
159    The data passed to this call back may be binary, and is not guaranteed
160    to be NULL terminated. The function may modify the buffer passed to
161    it. It of course can't assume its bigger then len.
162 
163    Sample handlers are the text/enriched and text/html strippers.
164 
165    The last call to this function for a part has buf set to NULL to indicate
166    handling of the part is complete.
167   --- */
168 typedef void OutputFn(void *oFnState, char *buf, long len);
169 
170 
171 /* ----------------------------------------------------------------------
172    This is the call back supplied by the user to the MIME parser for
173    handling non-MIME headers. The function is called with the contents
174    of each header. The isContination parameter will be 1 if the buffer
175    is a continuation of the previous header.  The state parameter
176    is a pointer the caller initializes in the MimeInit call.
177 
178    The blank line that separates the header from the body IS passed, and
179    signals that all headers for a MIME entity have been passed, and that
180    there is no continuation for the last header.
181 
182    ---*/
183 typedef void Rfc822Fn(void         *state,
184                       char         *header,
185                       long          len,
186                       unsigned char isContinuation);
187 
188 
189 /* ----------------------------------------------------------------------
190    Another call back supplied by the user. This one is called for each
191    MIME entity the parser encounters. The function is given the mime
192    type and multipart attributes. It must supply an output function
193    and a context/state to be passed to the output function.
194 
195    The very first call to this function has a NULL mType and multAttribs,
196    and is to get the headerFn for very top level headers. At that point no
197    MIME type is known. The outFn pointer will be NULL too, so don't
198    try and pass that back.
199 
200    This is where all the real handling happens.
201 
202    Args:  typeMapperState - The state passed to MimeInit
203           mError          - An error code for current part
204           mType           - Mime type of current part
205           multAttribs     - Describes nesting
206           outFn           - Place to return content handling function
207           outFnState      - State for content handling function
208           headerFn        - Header outputing function
209           headerFnState   - Header outputing state
210    --- */
211 typedef enum {merrorNone, merrorTooDeep, merrorBadCTE,
212               merrorNoBoundary} MimeErrorKind;
213 
214 typedef void MimeTypeFn(
215   void                 *state,
216   const MimeErrorKind   mError,
217   const MimeTypePtr     mType,
218   const MultAttribsPtr  multAttribs,
219   OutputFn            **outFnConstructor,
220   OutputFn            **outFn,
221   OutputFn            **outFnDestructor,
222   void                **outFnState,
223   Rfc822Fn            **headerFn,
224   void                **headerFnState);
225 
226 
227 /* ================= E N D   P U B L I C   S T U F F  ================ */
228 
229 
230 
231 /* ======================================================================
232     P R I V A T E   S T U F F
233    ====================================================================== */
234 /* ----------------------------------------------------------------------
235    Keep track of state of rfc822 lexer.  Can initialize this structure
236    by zeroing all bytes.
237    ---- */
238 typedef struct {
239   unsigned char parenNesting; /* Level of parenthesis nesting */
240   unsigned char inQuotes;     /* Are we in a quoted part? */
241   unsigned char dontDownCase; /* Don't make lower when copying tokens */
242   unsigned char gettingToken; /* Not looking for space */
243   unsigned char tokenLen;     /* Length of token we've buffered up */
244 } Rfc822LexType;
245 
246 
247 /* ----------------------------------------------------------------------
248    Keep track of MIME header parser state
249    ---- */
250 typedef struct {
251   Rfc822LexType      lexState;
252   enum {wantMajor, wantSlash, wantMinor, wantSemi, wantParamName,
253         wantEqual, wantParamValue, wantDisp, wantDesc, wantCTE,
254         wantNothing} parseState;
255   MimeParamKind      paramWanted;
256   char               tokenBuf[kMIMETokenLen+1];
257 } HeaderParseType;
258 
259 
260 /* ----------------------------------------------------------------------
261    One structure to hold the state for all the different content-transfer
262    decoders
263    ---- */
264 typedef struct
265 {
266   OutputFn *outFn;
267   OutputFn *outFnCtor;
268   OutputFn *outFnDtor;
269   void     *outFnState;
270   union {
271   	struct {
272   		char  lastWasEqual;
273   		char  is2047;
274   	} QP;
275   	struct {
276   		unsigned long threeBytes;
277   		signed char   shift;
278                 char          pad;
279   	} B64;
280   } x;
281 } MimeCteType, *MimeCtePtr;
282 
283 typedef void MimeCteFn(MimeCtePtr state, char *buffer, long bufferLen);
284 
285 
286 /* ----------------------------------------------------------------------
287    The main structure for storing all our junk
288    --- */
289 typedef struct {
290   /* ---- Track boundaries and major MIME parser state ---- */
291   signed char     currentBoundary;
292   char            mimeBoundaries[kMaxMIMENesting][kMIMEBoundaryLen];
293   enum {parseHeaders=0, parseBodyPart, parsePartStart, parseMimeHeaders}
294                   msgPartState; /* State for multipart and message types */
295 
296   /* ---- General header parser state ---- */
297   enum {wantedHeader=0, mimeHeader, unwantedHeader, rfc822Header}
298                   headerDisposition;
299   unsigned char   isMIME;
300   unsigned char   lastHadNoEOL;
301   char            gotTopHeaderHandler;
302 
303   /* ---- MIME type of current part ---- */
304   MimeTypeType    mType;
305 
306   /* ---- Track the multipart nesting that got us to where we are ---- */
307   MultAttribsType multAttribs;
308 
309   /* ---- MIME header (Content-*) parser state ---- */
310   HeaderParseType mHeaderState;
311 
312   /* ---- Stuff to handle CTE ---- */
313   /* ---- The CTE handler state includes specific data handler ---- */
314   enum {outputCTE, outputNone, outputDirect} outputTag;
315   OutputFn       *cteHandlerConstructor;
316   OutputFn       *cteHandlerDestructor;
317   union {
318     OutputFn       *outHandler;
319     MimeCteFn      *cteHandler;
320   } output;
321   union {
322     MimeCteType     cteState; /* When we actually do CTE */
323     void           *oFnState; /* When we go direct to output handler */
324   } outState;
325 
326   /* --- The mime type mapper call back --- */
327   MimeTypeFn     *mapperFun;
328   void           *mapperFunState;
329   Rfc822Fn       *headerHandler;
330   void           *headerHandlerState;
331 
332   /* --- handle fact that trailing newline may be part of boundary --- */
333   char            savedNewline;
334 
335   /* --- input blocking to make boundary detection easier --- */
336   char            buf[kInBufSize];/*Buffer input once for boundary checking*/
337   short           bufLen;
338   FILE           *mdrop;
339 } MimeParseType, *MimeParsePtr;
340 
341 /* ================= E N D   P R I V A T E   S T U F F  ================ */
342 
343 
344 
345 
346 /* ======================================================================
347      P U B L I C   I N T E R F A C E, Part 2
348    ====================================================================== */
349 
350 
351 
352 /* ----------------------------------------------------------------------
353    Initialize the MIME mangler - call this at start of every message
354 
355    Args: typeMapper - A function to be called for each MIME type (see above)
356          typeMapperState - a pointer/context passed to each mapper call
357 
358    Returns: pointer to Mime parser state/context or NULL
359    ---- */
360 
361 MimeParsePtr MimeInit __PROTO((
362   MimeTypeFn    *typeMapper,
363   void          *typeMapperState,
364   FILE          *mbox));
365 
366 
367 
368 /* ----------------------------------------------------------------------
369    Done with MIME parser - clean up
370 
371    Args: s - State/context
372 
373    Bug: This ought to flush any buffered input
374    ---- */
375 void MimeFinish __PROTO((MimeParsePtr s));
376 
377 
378 
379 /*----------------------------------------------------------------------
380   Actually do the MIME parser
381 
382   Args: state - State/context returned from the MimeInit call
383         inBuf - a buffer of input to process
384         inBufLen - length of the input buffer
385 
386   Returns: nothing
387 
388   The input buffers may be any size from byte at a type to megabyte
389   buffers at a time. The input may also be binary MIME.
390 
391   Calls here result in calls to the typeMapper function, which results
392   in calls to the output function supplied by the type mapper.
393   ---- */
394 void MimeInput __PROTO((
395   MimeParsePtr  state,
396   const char   *inBuf,
397   unsigned long inBufLen));
398 
399 
400 #endif /* _MIMEINCLUDED */
401 
402 
403 
404