1
2 /* Web Polygraph http://www.web-polygraph.org/
3 * Copyright 2003-2011 The Measurement Factory
4 * Licensed under the Apache License, Version 2.0 */
5
6 #ifndef POLYGRAPH__RUNTIME_HTTPHDRS_H
7 #define POLYGRAPH__RUNTIME_HTTPHDRS_H
8
9 #include <list>
10
11 #include "xstd/h/iostream.h"
12
13 #include "xstd/Array.h"
14 #include "xstd/NetAddr.h"
15 #include "xstd/Checksum.h"
16 #include "base/AuthPhaseStat.h"
17 #include "base/ObjId.h"
18 #include "runtime/Agent.h"
19 #include "runtime/ObjWorld.h"
20 #include "runtime/XactAbortCoord.h"
21 #include "runtime/HttpVersion.h"
22
23
24 class MsgHdrParsTab;
25 class HttpCookies;
26
27 // XXX: make HTTP-independent
28 enum HttpAuthScheme { authNone = AuthPhaseStat::sNone,
29 authBasic = AuthPhaseStat::sBasic,
30 authNtlm = AuthPhaseStat::sNtlm,
31 authNegotiate = AuthPhaseStat::sNegotiate,
32 authFtp = AuthPhaseStat::sFtp };
33
34 class HttpUri {
35 public:
36 HttpUri();
37
38 public:
39 ObjId oid;
40 NetAddr host;
41 const char *pathBuf;
42 int pathLen;
43 };
44
45 struct ByteRange {
46 Size theFirstByte;
47 Size theLastByte;
48 };
49
50 // Encapsulates RFC 2617 "challenge" information
51 // Used to store Proxy-Authenticate and WWW-Authenticate header values
52 class AuthChallenge {
53 public:
AuthChallenge()54 AuthChallenge(): scheme(authNone) {}
55
56 HttpAuthScheme scheme; // auth-scheme in RFC 2616
57 String params; // 1#auth-param in RFC 2616
58 };
59
60 typedef std::list<ByteRange> RangeList;
61
62 // common interface for parsing HTTP requests and responses
63 class MsgHdr {
64 public:
65 typedef bool (MsgHdr::*Parser)(const char *buf, const char *eoh);
66
67 protected:
68 static bool ParseHostInUri(const char *&start, const char *eorl, NetAddr &host);
69 static bool ParseSingleRange(const char *&buf, const char *eoh, Size &firstByte, Size &lastByte);
70 static bool ParseAuthenticate(const char *&buf, const char *eoh, AuthChallenge &auth);
71
72 public:
73 MsgHdr(const MsgHdrParsTab &aTab);
74 virtual ~MsgHdr();
75
76 virtual void reset();
77
78 bool persistentConnection() const;
79 bool knownContentType() const;
80 bool markupContent() const;
81 bool multiRange() const;
82 bool chunkedEncoding() const;
83
84 bool parse(const char *buf, Size sz);
85 bool parseUri(const char *&buf, const char *eoh, HttpUri &uri);
86
87 virtual void store(OLog &log) const;
88
89 void collectCookies(HttpCookies *cookies);
90
91 protected:
92 static void Configure(MsgHdrParsTab &tab);
93 static int AddParser(const String &field, Parser parser, MsgHdrParsTab &tab);
94
95 protected:
96 void parseFields();
97
98 virtual bool parseRLine(const char *buf, const char *eol) = 0;
99
100 public:
101 // can be used by both requests and replies
102 bool parseHttpVersion(const char *&beg, const char *end, HttpVersion &v);
103 bool parseDate(const char *buf, const char *eoh);
104 bool parseContLen(const char *buf, const char *eoh);
105 bool parseContMd5(const char *buf, const char *eoh);
106 bool parseContType(const char *buf, const char *eoh);
107 bool parsePragma(const char *buf, const char *eoh);
108 bool parseCControl(const char *buf, const char *eoh);
109 bool parseXXact(const char *buf, const char *eoh);
110 bool parseXTarget(const char *buf, const char *eoh);
111 bool parseXRemWorld(const char *buf, const char *eoh);
112 bool parseXAbort(const char *buf, const char *eoh);
113 bool parseXPhaseSyncPos(const char *buf, const char *eoh);
114 bool parseConnection(const char *buf, const char *eoh);
115 bool parseTransferEncoding(const char *buf, const char *eoh);
116 bool parseCookie(const char *buf, const char *eoh);
117
118 // these have to be here so we can register them in req
119 virtual bool parseGetReqLine(const char *buf, const char *eorl);
120 virtual bool parseHeadReqLine(const char *buf, const char *eorl);
121 virtual bool parsePostReqLine(const char *buf, const char *eorl);
122 virtual bool parsePutReqLine(const char *buf, const char *eorl);
123 virtual bool parsePatchReqLine(const char *buf, const char *eorl);
124 virtual bool parseHost(const char *buf, const char *eoh);
125 virtual bool parseIms(const char *buf, const char *eoh);
126 virtual bool parseXLocWorld(const char *buf, const char *eoh);
127 virtual bool parseAcceptEncoding(const char *buf, const char *eoh);
128 virtual bool parseExpect(const char *buf, const char *eoh);
129
130 // these have to be here so we can register them in rep
131 virtual bool parseServer(const char *buf, const char *eoh);
132 virtual bool parseProxyAuthenticate(const char *buf, const char *eoh);
133 virtual bool parseWwwAuthenticate(const char *buf, const char *eoh);
134 virtual bool parseLocation(const char *buf, const char *eoh);
135 virtual bool parseLMT(const char *buf, const char *eoh);
136 virtual bool parseExpires(const char *buf, const char *eoh);
137 virtual bool parseContRange(const char *buf, const char *eoh);
138 virtual bool parseRange(const char *buf, const char *eoh);
139
140 public:
141 const MsgHdrParsTab &theParsTab;
142
143 Size theHdrSize;
144 HttpVersion theHttpVersion;
145 Time theDate; // Date: header field
146 Size theContSize;
147 xstd::Checksum theChecksum; // Content-MD5
148
149 UniqId theGroupId;
150 UniqId theXactId;
151 NetAddr theTarget;
152
153 ObjWorld theRemWorld; // remote object world specs
154 XactAbortCoord theAbortCoord; // abort coordinates
155 int thePhaseSyncPos; // index of a ready-to-stop phase
156
157 int theXactFlags;
158
159 enum { kaNo, kaDefault, kaYes } theConnectionKeepAlive;
160 enum { ctUnknown, ctMarkup, ctOther, ctMultiRange } theContType; // content type
161 enum { tcNone, tcChunked, tcIdentity, tcOther } theTransferEncoding;
162 bool isCachable;
163 String theBoundary;
164
165 int theCookieCount; // number of cookies
166 HttpCookies *theCookies; // a place to collect cookies, if needed
167
168 protected:
169 Array<const char *> theFields; // positions of header fields
170 const char *theBufBeg, *theBufEnd;
171 const char *theSrchPtr; // parse buffer iterator
172 enum { ssFirst, ssSkip, ssFound } theSrchState;
173 bool needCookies; // whether cookies should be collected
174 bool createdCookies; // true cookies were created during parsing
175 };
176
177 class ReqHdr: public MsgHdr {
178 public:
179 static void Configure();
180 static void Clean();
181
182 public:
183 ReqHdr();
184
185 virtual void reset();
186
187 virtual void store(OLog &log) const;
188
189 bool expectBody() const;
190 bool acceptedEncoding(int coding) const;
191
192 protected:
193 virtual bool parseRLine(const char *buf, const char *eol);
194
195 bool parseAnyReqLine(const char *buf, const char *eorl);
196
197 virtual bool parseAcceptEncoding(const char *buf, const char *eoh);
198 virtual bool parseGetReqLine(const char *buf, const char *eorl);
199 virtual bool parseHeadReqLine(const char *buf, const char *eorl);
200 virtual bool parsePostReqLine(const char *buf, const char *eorl);
201 virtual bool parsePutReqLine(const char *buf, const char *eorl);
202 virtual bool parsePatchReqLine(const char *buf, const char *eorl);
203 virtual bool parseHost(const char *buf, const char *eoh);
204 virtual bool parseIms(const char *buf, const char *eoh);
205 virtual bool parseXLocWorld(const char *buf, const char *eoh);
206 virtual bool parseRange(const char *buf, const char *eoh);
207 virtual bool parseExpect(const char *buf, const char *eoh);
208
209 protected:
210 static MsgHdrParsTab *TheParsTab;
211
212 public:
213 HttpUri theUri;
214 Time theIms; // If-Modified-Since timestamp
215 ObjWorld theLocWorld; // local world info
216 bool isHealthCheck;
217 bool isAcceptingGzip;
218
219 RangeList theRanges;
220 bool expect100Continue;
221 };
222
223 class RepHdr: public MsgHdr {
224 public:
225 enum StatusCode { scUnknown = -1, sc100_Continue = 100,
226 sc200_OK = 200, sc202_Accepted = 202,
227 sc204_NoContent = 204, sc206_PartialContent = 206,
228 sc300_Choices = 300, sc302_Found = 302, sc303_Other = 303,
229 sc304_NotModified = 304, sc307_TmpRedir = 307,
230 sc401_Unauthorized = 401,
231 sc403_Forbidden = 403,
232 sc406_NotAcceptable = 406,
233 sc407_ProxyAuthRequired = 407,
234 sc416_RequestedRangeNotSatisfiable = 416,
235 sc417_ExpectationFailed = 417 };
236
237 static bool PositiveStatusCode(int code);
238
239 public:
240 static void Configure();
241 static void Clean();
242
243 public:
244 RepHdr();
245
246 virtual void reset();
247
polyHeaders()248 bool polyHeaders() const { return theXactId || theTarget; }
249 bool expectPolyHeaders() const;
250 bool expectBody() const;
251
252 bool redirect() const;
253
254 Time calcLmt() const;
255
256 virtual void store(OLog &log) const;
257
258 bool acceptedEncoding(int coding) const;
259
260 protected:
261 virtual bool parseRLine(const char *buf, const char *eol);
262
263 virtual bool parseServer(const char *buf, const char *eoh);
264 virtual bool parseProxyAuthenticate(const char *buf, const char *eoh);
265 virtual bool parseWwwAuthenticate(const char *buf, const char *eoh);
266 virtual bool parseLocation(const char *buf, const char *eoh);
267 virtual bool parseLMT(const char *buf, const char *eoh);
268 virtual bool parseExpires(const char *buf, const char *eoh);
269 virtual bool parseContRange(const char *buf, const char *eoh);
270
271 protected:
272 static MsgHdrParsTab *TheParsTab;
273
274 public:
275 HttpUri theLocn; // Location: in 3xx responses
276 Time theLMT; // Last-Modified-Time header field
277 Time theExpires; // Expires HTTP header field
278 int theStatus; // HTTP status code
279
280 String theServer; // server agent string
281 AuthChallenge theProxyAuthenticate; // proxy authenticate header
282 AuthChallenge theOriginAuthenticate; // origin authenticate header
283 Size theContRangeFirstByte;
284 Size theContRangeLastByte;
285 Size theContRangeInstanceLength;
286 };
287
crlf(ostream & os)288 inline ostream &crlf(ostream &os) { return os.write("\r\n", 2); }
289
290 #endif
291