1*c2c66affSColin Finck /*
2*c2c66affSColin Finck  * COPYRIGHT:   See COPYING in the top level directory
3*c2c66affSColin Finck  * PROJECT:     ReactOS HTTP Daemon
4*c2c66affSColin Finck  * FILE:        http.cpp
5*c2c66affSColin Finck  * PURPOSE:     HTTP 1.1 parser engine
6*c2c66affSColin Finck  * PROGRAMMERS: Casper S. Hornstrup (chorns@users.sourceforge.net)
7*c2c66affSColin Finck  * REVISIONS:
8*c2c66affSColin Finck  *   CSH  01/09/2000 Created
9*c2c66affSColin Finck  * TODO:        - Implement message-body
10*c2c66affSColin Finck  *              - Implement more generel-header entries
11*c2c66affSColin Finck  *              - Implement more request-header entries
12*c2c66affSColin Finck  *              - Implement more entity-header entries
13*c2c66affSColin Finck  */
14*c2c66affSColin Finck #include <debug.h>
15*c2c66affSColin Finck #include <iostream>
16*c2c66affSColin Finck #include <string.h>
17*c2c66affSColin Finck #include <http.h>
18*c2c66affSColin Finck 
19*c2c66affSColin Finck CHAR MethodTable[NUMMETHODS][8] = {"OPTIONS", "GET", "HEAD", "POST", "PUT",
20*c2c66affSColin Finck     "DELETE", "TRACE"};
21*c2c66affSColin Finck 
22*c2c66affSColin Finck CHAR GenerelTable[NUMGENERELS][18] = {"Cache-Control", "Connection", "Date", "Pragma",
23*c2c66affSColin Finck     "Transfer-Encoding", "Upgrade", "Via"};
24*c2c66affSColin Finck 
25*c2c66affSColin Finck CHAR RequestTable[NUMREQUESTS][20] = {"Accept", "Accept-Charset", "Accept-Encoding",
26*c2c66affSColin Finck     "Accept-Language", "Authorization", "From", "Host", "If-Modified-Since", "If-Match",
27*c2c66affSColin Finck     "If-None-Match", "If-Range", "If-Unmodified-Since", "Max-Forwards",
28*c2c66affSColin Finck     "Proxy-Authorization", "Range", "Referer", "User-Agent"};
29*c2c66affSColin Finck 
30*c2c66affSColin Finck CHAR EntityTable[NUMENTITIES][17] = {"Allow", "Content-Base", "Content-Encoding",
31*c2c66affSColin Finck     "Content-Language", "Content-Length", "Content-Location", "Content-MD5",
32*c2c66affSColin Finck     "Content-Range", "Content-Type", "ETag", "Expires", "Last-Modified"};
33*c2c66affSColin Finck 
34*c2c66affSColin Finck // *************************** CHttpParser ***************************
35*c2c66affSColin Finck 
36*c2c66affSColin Finck // Default constructor
CHttpParser()37*c2c66affSColin Finck CHttpParser::CHttpParser()
38*c2c66affSColin Finck {
39*c2c66affSColin Finck     nHead = 0;
40*c2c66affSColin Finck     nTail = 0;
41*c2c66affSColin Finck }
42*c2c66affSColin Finck 
43*c2c66affSColin Finck // Default destructor
~CHttpParser()44*c2c66affSColin Finck CHttpParser::~CHttpParser()
45*c2c66affSColin Finck {
46*c2c66affSColin Finck }
47*c2c66affSColin Finck 
48*c2c66affSColin Finck // Returns TRUE if a complete HTTP message is in buffer
Complete()49*c2c66affSColin Finck BOOL CHttpParser::Complete()
50*c2c66affSColin Finck {
51*c2c66affSColin Finck     UINT nTmp;
52*c2c66affSColin Finck 
53*c2c66affSColin Finck     /*DPRINT("--1:-%d---\n", sBuffer[nHead-2]);
54*c2c66affSColin Finck     DPRINT("--2:-%d---\n", sBuffer[nHead-1]);
55*c2c66affSColin Finck 
56*c2c66affSColin Finck     sBuffer[nHead] = '!';
57*c2c66affSColin Finck     sBuffer[nHead+1] = 0;
58*c2c66affSColin Finck     DPRINT("Examining buffer: (Head: %d, Tail: %d)\n", nHead, nTail);
59*c2c66affSColin Finck     DPRINT("%s\n", (LPSTR)&sBuffer[nTail]);*/
60*c2c66affSColin Finck 
61*c2c66affSColin Finck     nTmp = nTail;
62*c2c66affSColin Finck     if (!Parse()) {
63*c2c66affSColin Finck         if (!bUnknownMethod)
64*c2c66affSColin Finck             nTail = nTmp;
65*c2c66affSColin Finck         return FALSE;
66*c2c66affSColin Finck     } else
67*c2c66affSColin Finck         return TRUE;
68*c2c66affSColin Finck }
69*c2c66affSColin Finck 
70*c2c66affSColin Finck 
71*c2c66affSColin Finck // Read a character from buffer
ReadChar(LPSTR lpsStr)72*c2c66affSColin Finck BOOL CHttpParser::ReadChar(LPSTR lpsStr)
73*c2c66affSColin Finck {
74*c2c66affSColin Finck     if (nTail <= nHead) {
75*c2c66affSColin Finck         if (nTail != nHead) {
76*c2c66affSColin Finck             lpsStr[0] = sBuffer[nTail];
77*c2c66affSColin Finck             nTail++;
78*c2c66affSColin Finck             return TRUE;
79*c2c66affSColin Finck         } else {
80*c2c66affSColin Finck             lpsStr[0] = 0;
81*c2c66affSColin Finck             return FALSE;
82*c2c66affSColin Finck         }
83*c2c66affSColin Finck     } else {
84*c2c66affSColin Finck         if (nTail == sizeof(sBuffer))
85*c2c66affSColin Finck             nTail = 0;
86*c2c66affSColin Finck         if (nTail != nHead) {
87*c2c66affSColin Finck             lpsStr[0] = sBuffer[nTail];
88*c2c66affSColin Finck             nTail++;
89*c2c66affSColin Finck             return TRUE;
90*c2c66affSColin Finck         } else {
91*c2c66affSColin Finck             lpsStr[0] = 0;
92*c2c66affSColin Finck             return FALSE;
93*c2c66affSColin Finck         }
94*c2c66affSColin Finck     }
95*c2c66affSColin Finck }
96*c2c66affSColin Finck 
97*c2c66affSColin Finck // Peek at a character in the buffer
PeekChar(LPSTR lpsStr)98*c2c66affSColin Finck BOOL CHttpParser::PeekChar(LPSTR lpsStr)
99*c2c66affSColin Finck {
100*c2c66affSColin Finck     UINT nFakeTail;
101*c2c66affSColin Finck 
102*c2c66affSColin Finck     if (nTail == sizeof(sBuffer))
103*c2c66affSColin Finck         nFakeTail = 0;
104*c2c66affSColin Finck     else
105*c2c66affSColin Finck         nFakeTail = nTail;
106*c2c66affSColin Finck     if (nFakeTail != nHead) {
107*c2c66affSColin Finck         lpsStr[0] = sBuffer[nFakeTail];
108*c2c66affSColin Finck         return TRUE;
109*c2c66affSColin Finck     } else {
110*c2c66affSColin Finck         lpsStr[0] = 0;
111*c2c66affSColin Finck         return FALSE;
112*c2c66affSColin Finck     }
113*c2c66affSColin Finck }
114*c2c66affSColin Finck 
115*c2c66affSColin Finck // Read a string from buffer. Only A-Z, a-z, 0-9 and '-' are valid characters
ReadString(LPSTR lpsStr,UINT nLength)116*c2c66affSColin Finck BOOL CHttpParser::ReadString(LPSTR lpsStr, UINT nLength)
117*c2c66affSColin Finck {
118*c2c66affSColin Finck     UINT i = 0;
119*c2c66affSColin Finck     CHAR sTmp;
120*c2c66affSColin Finck 
121*c2c66affSColin Finck     while (PeekChar(&sTmp)) {
122*c2c66affSColin Finck         if (((sTmp >= 'A') && (sTmp <= 'Z')) || ((sTmp >= 'a') && (sTmp <= 'z')) ||
123*c2c66affSColin Finck             ((sTmp >= '0') && (sTmp <= '9')) || (sTmp == '-')) {
124*c2c66affSColin Finck             if (i >= (nLength - 1)) {
125*c2c66affSColin Finck                 lpsStr[0] = 0;
126*c2c66affSColin Finck                 return FALSE;
127*c2c66affSColin Finck             }
128*c2c66affSColin Finck             ReadChar(&sTmp);
129*c2c66affSColin Finck             lpsStr[i] = sTmp;
130*c2c66affSColin Finck             i++;
131*c2c66affSColin Finck         } else {
132*c2c66affSColin Finck             lpsStr[i] = 0;
133*c2c66affSColin Finck             return TRUE;
134*c2c66affSColin Finck         }
135*c2c66affSColin Finck     }
136*c2c66affSColin Finck     lpsStr[0] = 0;
137*c2c66affSColin Finck     return FALSE;
138*c2c66affSColin Finck }
139*c2c66affSColin Finck 
140*c2c66affSColin Finck // Read a string from buffer. Stop if SP or CR is found or when there are no more
141*c2c66affSColin Finck // characters
ReadSpecial(LPSTR lpsStr,UINT nLength)142*c2c66affSColin Finck BOOL CHttpParser::ReadSpecial(LPSTR lpsStr, UINT nLength)
143*c2c66affSColin Finck {
144*c2c66affSColin Finck     UINT i = 0;
145*c2c66affSColin Finck     CHAR sTmp;
146*c2c66affSColin Finck 
147*c2c66affSColin Finck     while (PeekChar(&sTmp) && (sTmp != ' ') && (sTmp != 13)) {
148*c2c66affSColin Finck         if (i >= (nLength - 1)) {
149*c2c66affSColin Finck             lpsStr[nLength - 1] = 0;
150*c2c66affSColin Finck             return FALSE;
151*c2c66affSColin Finck         }
152*c2c66affSColin Finck         ReadChar(&sTmp);
153*c2c66affSColin Finck         lpsStr[i] = sTmp;
154*c2c66affSColin Finck         i++;
155*c2c66affSColin Finck     }
156*c2c66affSColin Finck     lpsStr[i] = 0;
157*c2c66affSColin Finck     return TRUE;
158*c2c66affSColin Finck }
159*c2c66affSColin Finck 
160*c2c66affSColin Finck // Skip until "sCh" is found
Skip(CHAR sCh)161*c2c66affSColin Finck VOID CHttpParser::Skip(CHAR sCh)
162*c2c66affSColin Finck {
163*c2c66affSColin Finck     CHAR sTmp;
164*c2c66affSColin Finck 
165*c2c66affSColin Finck     while (PeekChar(&sTmp) && (sTmp != sCh))
166*c2c66affSColin Finck         ReadChar(&sTmp);
167*c2c66affSColin Finck }
168*c2c66affSColin Finck 
169*c2c66affSColin Finck // Return TRUE if sCh is the next character
Expect(CHAR sCh)170*c2c66affSColin Finck BOOL CHttpParser::Expect(CHAR sCh)
171*c2c66affSColin Finck {
172*c2c66affSColin Finck     CHAR sTmp;
173*c2c66affSColin Finck 
174*c2c66affSColin Finck     if (PeekChar(&sTmp)) {
175*c2c66affSColin Finck         if (sTmp == sCh) {
176*c2c66affSColin Finck             ReadChar(&sTmp);
177*c2c66affSColin Finck             return TRUE;
178*c2c66affSColin Finck         }
179*c2c66affSColin Finck     }
180*c2c66affSColin Finck     return FALSE;
181*c2c66affSColin Finck }
182*c2c66affSColin Finck 
183*c2c66affSColin Finck // Return TRUE if CRLF are the next characters
ExpectCRLF()184*c2c66affSColin Finck BOOL CHttpParser::ExpectCRLF()
185*c2c66affSColin Finck {
186*c2c66affSColin Finck     return (Expect(13) && Expect(10));
187*c2c66affSColin Finck }
188*c2c66affSColin Finck 
189*c2c66affSColin Finck // Request = RequestLine | *( GenerelHeader | RequestHeader | EntityHeader )
190*c2c66affSColin Finck //           CRLF [ MessageBody ]
Parse()191*c2c66affSColin Finck BOOL CHttpParser::Parse()
192*c2c66affSColin Finck {
193*c2c66affSColin Finck     BOOL bStatus;
194*c2c66affSColin Finck 
195*c2c66affSColin Finck     if (RequestLine()) {
196*c2c66affSColin Finck         do {
197*c2c66affSColin Finck             if (!ReadString(sHeader, sizeof(sHeader)))
198*c2c66affSColin Finck                 break;
199*c2c66affSColin Finck             bStatus = (GenerelHeader());
200*c2c66affSColin Finck             bStatus = (RequestHeader() || bStatus);
201*c2c66affSColin Finck             bStatus = (EntityHeader() || bStatus);
202*c2c66affSColin Finck         } while (bStatus);
203*c2c66affSColin Finck         // CRLF
204*c2c66affSColin Finck         if (!ExpectCRLF())
205*c2c66affSColin Finck             return FALSE;
206*c2c66affSColin Finck         MessageBody();
207*c2c66affSColin Finck         return TRUE;
208*c2c66affSColin Finck     }
209*c2c66affSColin Finck     return FALSE;
210*c2c66affSColin Finck }
211*c2c66affSColin Finck 
212*c2c66affSColin Finck // RequestLine = Method SP RequestURI SP HTTP-Version CRLF
RequestLine()213*c2c66affSColin Finck BOOL CHttpParser::RequestLine()
214*c2c66affSColin Finck {
215*c2c66affSColin Finck     CHAR sCh;
216*c2c66affSColin Finck     UINT i;
217*c2c66affSColin Finck 
218*c2c66affSColin Finck     bUnknownMethod = FALSE;
219*c2c66affSColin Finck 
220*c2c66affSColin Finck     // RFC 2068 states that servers SHOULD ignore any empty nine(s) received where a
221*c2c66affSColin Finck     // Request-Line is expected
222*c2c66affSColin Finck     while (PeekChar(&sCh) && ((sCh == 13) || (sCh == 10)));
223*c2c66affSColin Finck 
224*c2c66affSColin Finck     if (!ReadString(sMethod, sizeof(sMethod)))
225*c2c66affSColin Finck         return FALSE;
226*c2c66affSColin Finck 
227*c2c66affSColin Finck     for (i = 0; i < NUMMETHODS; i++) {
228*c2c66affSColin Finck         if (strcmp(MethodTable[i], sMethod) == 0) {
229*c2c66affSColin Finck             nMethodNo = i;
230*c2c66affSColin Finck             if (!Expect(' '))
231*c2c66affSColin Finck                 return FALSE;
232*c2c66affSColin Finck             // URI (ie. host/directory/resource)
233*c2c66affSColin Finck             if (!ReadSpecial(sUri, sizeof(sUri)))
234*c2c66affSColin Finck                 return FALSE;
235*c2c66affSColin Finck             if (!Expect(' '))
236*c2c66affSColin Finck                 return FALSE;
237*c2c66affSColin Finck             // HTTP version (eg. HTTP/1.1)
238*c2c66affSColin Finck             if (!ReadSpecial(sVersion, sizeof(sVersion)))
239*c2c66affSColin Finck                 return FALSE;
240*c2c66affSColin Finck             // CRLF
241*c2c66affSColin Finck             if (!ExpectCRLF())
242*c2c66affSColin Finck                 return FALSE;
243*c2c66affSColin Finck 
244*c2c66affSColin Finck             return TRUE;
245*c2c66affSColin Finck         }
246*c2c66affSColin Finck     }
247*c2c66affSColin Finck     bUnknownMethod = TRUE;
248*c2c66affSColin Finck     return FALSE;
249*c2c66affSColin Finck }
250*c2c66affSColin Finck 
251*c2c66affSColin Finck // GenerelHeader = Cache-Control | Connection | Date | Pragma | Transfer-Encoding |
252*c2c66affSColin Finck //                 Upgrade | Via
GenerelHeader()253*c2c66affSColin Finck BOOL CHttpParser::GenerelHeader()
254*c2c66affSColin Finck {
255*c2c66affSColin Finck     INT i;
256*c2c66affSColin Finck 
257*c2c66affSColin Finck     for (i = 0; i < NUMGENERELS; i++) {
258*c2c66affSColin Finck         if (strcmp(GenerelTable[i], sHeader) == 0) {
259*c2c66affSColin Finck             switch (i) {
260*c2c66affSColin Finck                 case 1: {
261*c2c66affSColin Finck                     //Connection
262*c2c66affSColin Finck                     Expect(':');
263*c2c66affSColin Finck                     Expect(' ');
264*c2c66affSColin Finck                     Skip(13);
265*c2c66affSColin Finck                     ExpectCRLF();
266*c2c66affSColin Finck                     break;
267*c2c66affSColin Finck                 }
268*c2c66affSColin Finck                 default: {
269*c2c66affSColin Finck                     Expect(':');
270*c2c66affSColin Finck                     Expect(' ');
271*c2c66affSColin Finck                     Skip(13);
272*c2c66affSColin Finck                     ExpectCRLF();
273*c2c66affSColin Finck                 }
274*c2c66affSColin Finck             }
275*c2c66affSColin Finck             return TRUE;
276*c2c66affSColin Finck         }
277*c2c66affSColin Finck     }
278*c2c66affSColin Finck     return FALSE;
279*c2c66affSColin Finck }
280*c2c66affSColin Finck 
281*c2c66affSColin Finck // RequestHeader = Accept | Accept-Charset | Accept-Encoding | Accept-Language |
282*c2c66affSColin Finck //                 Authorization | From | Host | If-Modified-Since | If-Match |
283*c2c66affSColin Finck //                 If-None-Match | If-Range | If-Unmodified-Since | Max-Forwards |
284*c2c66affSColin Finck //                 Proxy-Authorization | Range | Referer | User-Agent
RequestHeader()285*c2c66affSColin Finck BOOL CHttpParser::RequestHeader()
286*c2c66affSColin Finck {
287*c2c66affSColin Finck     INT i;
288*c2c66affSColin Finck 
289*c2c66affSColin Finck     for (i = 0; i < NUMREQUESTS; i++) {
290*c2c66affSColin Finck         if (strcmp(RequestTable[i], sHeader) == 0) {
291*c2c66affSColin Finck             switch (i) {
292*c2c66affSColin Finck                 case 0: {
293*c2c66affSColin Finck                     //Accept
294*c2c66affSColin Finck                     Expect(':');
295*c2c66affSColin Finck                     Expect(' ');
296*c2c66affSColin Finck                     Skip(13);
297*c2c66affSColin Finck                     ExpectCRLF();
298*c2c66affSColin Finck                     break;
299*c2c66affSColin Finck                 }
300*c2c66affSColin Finck                 case 2: {
301*c2c66affSColin Finck                     //Accept-Encoding
302*c2c66affSColin Finck                     Expect(':');
303*c2c66affSColin Finck                     Expect(' ');
304*c2c66affSColin Finck                     Skip(13);
305*c2c66affSColin Finck                     ExpectCRLF();
306*c2c66affSColin Finck                     break;
307*c2c66affSColin Finck                 }
308*c2c66affSColin Finck                 case 3: {
309*c2c66affSColin Finck                     //Accept-Language
310*c2c66affSColin Finck                     Expect(':');
311*c2c66affSColin Finck                     Expect(' ');
312*c2c66affSColin Finck                     Skip(13);
313*c2c66affSColin Finck                     ExpectCRLF();
314*c2c66affSColin Finck                     break;
315*c2c66affSColin Finck                 }
316*c2c66affSColin Finck                 case 6: {
317*c2c66affSColin Finck                     //Host
318*c2c66affSColin Finck                     Expect(':');
319*c2c66affSColin Finck                     Expect(' ');
320*c2c66affSColin Finck                     Skip(13);
321*c2c66affSColin Finck                     ExpectCRLF();
322*c2c66affSColin Finck                     break;
323*c2c66affSColin Finck                 }
324*c2c66affSColin Finck                 case 16: {
325*c2c66affSColin Finck                     //User-Agent
326*c2c66affSColin Finck                     Expect(':');
327*c2c66affSColin Finck                     Expect(' ');
328*c2c66affSColin Finck                     Skip(13);
329*c2c66affSColin Finck                     ExpectCRLF();
330*c2c66affSColin Finck                     break;
331*c2c66affSColin Finck                 }
332*c2c66affSColin Finck                 default: {
333*c2c66affSColin Finck                     Expect(':');
334*c2c66affSColin Finck                     Expect(' ');
335*c2c66affSColin Finck                     Skip(13);
336*c2c66affSColin Finck                     ExpectCRLF();
337*c2c66affSColin Finck                     return TRUE;
338*c2c66affSColin Finck                 }
339*c2c66affSColin Finck             }
340*c2c66affSColin Finck             return TRUE;
341*c2c66affSColin Finck         }
342*c2c66affSColin Finck     }
343*c2c66affSColin Finck     return FALSE;
344*c2c66affSColin Finck }
345*c2c66affSColin Finck 
346*c2c66affSColin Finck // EntityHeader = Allow | Content-Base | Content-Encoding | Content-Language |
347*c2c66affSColin Finck //                Content-Length | Content-Location | Content-MD5 |
348*c2c66affSColin Finck //                Content-Range | Content-Type | ETag | Expires |
349*c2c66affSColin Finck //                Last-Modified | extension-header
EntityHeader()350*c2c66affSColin Finck BOOL CHttpParser::EntityHeader()
351*c2c66affSColin Finck {
352*c2c66affSColin Finck     INT i;
353*c2c66affSColin Finck 
354*c2c66affSColin Finck     for (i = 0; i < NUMENTITIES; i++) {
355*c2c66affSColin Finck         if (strcmp(EntityTable[i], sHeader) == 0) {
356*c2c66affSColin Finck             switch (i) {
357*c2c66affSColin Finck                 case 0:
358*c2c66affSColin Finck                 default: {
359*c2c66affSColin Finck                     //cout << "<Entity-Header>: #" << i << endl;
360*c2c66affSColin Finck                     Expect(':');
361*c2c66affSColin Finck                     Expect(' ');
362*c2c66affSColin Finck                     Skip(13);
363*c2c66affSColin Finck                     ExpectCRLF();
364*c2c66affSColin Finck                     return TRUE;
365*c2c66affSColin Finck                 }
366*c2c66affSColin Finck             }
367*c2c66affSColin Finck             return FALSE;
368*c2c66affSColin Finck         }
369*c2c66affSColin Finck     }
370*c2c66affSColin Finck     return FALSE;
371*c2c66affSColin Finck }
372*c2c66affSColin Finck 
373*c2c66affSColin Finck // MessageBody = *OCTET
MessageBody()374*c2c66affSColin Finck BOOL CHttpParser::MessageBody()
375*c2c66affSColin Finck {
376*c2c66affSColin Finck     return FALSE;
377*c2c66affSColin Finck }
378