1 
2 /* Web Polygraph       http://www.web-polygraph.org/
3  * Copyright 2003-2011 The Measurement Factory
4  * Licensed under the Apache License, Version 2.0 */
5 
6 #ifndef POLYGRAPH__RUNTIME_HTTPHDRS_H
7 #define POLYGRAPH__RUNTIME_HTTPHDRS_H
8 
9 #include <list>
10 
11 #include "xstd/h/iostream.h"
12 
13 #include "xstd/Array.h"
14 #include "xstd/NetAddr.h"
15 #include "xstd/Checksum.h"
16 #include "base/AuthPhaseStat.h"
17 #include "base/ObjId.h"
18 #include "runtime/Agent.h"
19 #include "runtime/ObjWorld.h"
20 #include "runtime/XactAbortCoord.h"
21 #include "runtime/HttpVersion.h"
22 
23 
24 class MsgHdrParsTab;
25 class HttpCookies;
26 
27 // XXX: make HTTP-independent
28 enum HttpAuthScheme { authNone = AuthPhaseStat::sNone,
29 	authBasic = AuthPhaseStat::sBasic,
30 	authNtlm = AuthPhaseStat::sNtlm,
31 	authNegotiate = AuthPhaseStat::sNegotiate,
32 	authFtp = AuthPhaseStat::sFtp };
33 
34 class HttpUri {
35 	public:
36 		HttpUri();
37 
38 	public:
39 		ObjId oid;
40 		NetAddr host;
41 		const char *pathBuf;
42 		int pathLen;
43 };
44 
45 struct ByteRange {
46 	Size theFirstByte;
47 	Size theLastByte;
48 };
49 
50 // Encapsulates RFC 2617 "challenge" information
51 // Used to store Proxy-Authenticate and WWW-Authenticate header values
52 class AuthChallenge {
53 	public:
AuthChallenge()54 		AuthChallenge(): scheme(authNone) {}
55 
56 		HttpAuthScheme scheme; // auth-scheme in RFC 2616
57 		String params; // 1#auth-param in RFC 2616
58 };
59 
60 typedef std::list<ByteRange> RangeList;
61 
62 // common interface for parsing HTTP requests and responses
63 class MsgHdr {
64 	public:
65 		typedef bool (MsgHdr::*Parser)(const char *buf, const char *eoh);
66 
67 	protected:
68 		static bool ParseHostInUri(const char *&start, const char *eorl, NetAddr &host);
69 		static bool ParseSingleRange(const char *&buf, const char *eoh, Size &firstByte, Size &lastByte);
70 		static bool ParseAuthenticate(const char *&buf, const char *eoh, AuthChallenge &auth);
71 
72 	public:
73 		MsgHdr(const MsgHdrParsTab &aTab);
74 		virtual ~MsgHdr();
75 
76 		virtual void reset();
77 
78 		bool persistentConnection() const;
79 		bool knownContentType() const;
80 		bool markupContent() const;
81 		bool multiRange() const;
82 		bool chunkedEncoding() const;
83 
84 		bool parse(const char *buf, Size sz);
85 		bool parseUri(const char *&buf, const char *eoh, HttpUri &uri);
86 
87 		virtual void store(OLog &log) const;
88 
89 		void collectCookies(HttpCookies *cookies);
90 
91 	protected:
92 		static void Configure(MsgHdrParsTab &tab);
93 		static int AddParser(const String &field, Parser parser, MsgHdrParsTab &tab);
94 
95 	protected:
96 		void parseFields();
97 
98 		virtual bool parseRLine(const char *buf, const char *eol) = 0;
99 
100 	public:
101 		// can be used by both requests and replies
102 		bool parseHttpVersion(const char *&beg, const char *end, HttpVersion &v);
103 		bool parseDate(const char *buf, const char *eoh);
104 		bool parseContLen(const char *buf, const char *eoh);
105 		bool parseContMd5(const char *buf, const char *eoh);
106 		bool parseContType(const char *buf, const char *eoh);
107 		bool parsePragma(const char *buf, const char *eoh);
108 		bool parseCControl(const char *buf, const char *eoh);
109 		bool parseXXact(const char *buf, const char *eoh);
110 		bool parseXTarget(const char *buf, const char *eoh);
111 		bool parseXRemWorld(const char *buf, const char *eoh);
112 		bool parseXAbort(const char *buf, const char *eoh);
113 		bool parseXPhaseSyncPos(const char *buf, const char *eoh);
114 		bool parseConnection(const char *buf, const char *eoh);
115 		bool parseTransferEncoding(const char *buf, const char *eoh);
116 		bool parseCookie(const char *buf, const char *eoh);
117 
118 		// these have to be here so we can register them in req
119 		virtual bool parseGetReqLine(const char *buf, const char *eorl);
120 		virtual bool parseHeadReqLine(const char *buf, const char *eorl);
121 		virtual bool parsePostReqLine(const char *buf, const char *eorl);
122 		virtual bool parsePutReqLine(const char *buf, const char *eorl);
123 		virtual bool parsePatchReqLine(const char *buf, const char *eorl);
124 		virtual bool parseHost(const char *buf, const char *eoh);
125 		virtual bool parseIms(const char *buf, const char *eoh);
126 		virtual bool parseXLocWorld(const char *buf, const char *eoh);
127 		virtual bool parseAcceptEncoding(const char *buf, const char *eoh);
128 		virtual bool parseExpect(const char *buf, const char *eoh);
129 
130 		// these have to be here so we can register them in rep
131 		virtual bool parseServer(const char *buf, const char *eoh);
132 		virtual bool parseProxyAuthenticate(const char *buf, const char *eoh);
133 		virtual bool parseWwwAuthenticate(const char *buf, const char *eoh);
134 		virtual bool parseLocation(const char *buf, const char *eoh);
135 		virtual bool parseLMT(const char *buf, const char *eoh);
136 		virtual bool parseExpires(const char *buf, const char *eoh);
137 		virtual bool parseContRange(const char *buf, const char *eoh);
138 		virtual bool parseRange(const char *buf, const char *eoh);
139 
140 	public:
141 		const MsgHdrParsTab &theParsTab;
142 
143 		Size theHdrSize;
144 		HttpVersion theHttpVersion;
145 		Time theDate;          // Date: header field
146 		Size theContSize;
147 		xstd::Checksum theChecksum; // Content-MD5
148 
149 		UniqId theGroupId;
150 		UniqId theXactId;
151 		NetAddr theTarget;
152 
153 		ObjWorld theRemWorld; // remote object world specs
154 		XactAbortCoord theAbortCoord; // abort coordinates
155 		int thePhaseSyncPos; // index of a ready-to-stop phase
156 
157 		int theXactFlags;
158 
159 		enum { kaNo, kaDefault, kaYes } theConnectionKeepAlive;
160 		enum { ctUnknown, ctMarkup, ctOther, ctMultiRange } theContType; // content type
161 		enum { tcNone, tcChunked, tcIdentity, tcOther } theTransferEncoding;
162 		bool isCachable;
163 		String theBoundary;
164 
165 		int theCookieCount; // number of cookies
166 		HttpCookies *theCookies; // a place to collect cookies, if needed
167 
168 	protected:
169 		Array<const char *> theFields; // positions of header fields
170 		const char *theBufBeg, *theBufEnd;
171 		const char *theSrchPtr;        // parse buffer iterator
172 		enum { ssFirst, ssSkip, ssFound } theSrchState;
173 		bool needCookies; // whether cookies should be collected
174 		bool createdCookies; // true cookies were created during parsing
175 };
176 
177 class ReqHdr: public MsgHdr {
178 	public:
179 		static void Configure();
180 		static void Clean();
181 
182 	public:
183 		ReqHdr();
184 
185 		virtual void reset();
186 
187 		virtual void store(OLog &log) const;
188 
189 		bool expectBody() const;
190 		bool acceptedEncoding(int coding) const;
191 
192 	protected:
193 		virtual bool parseRLine(const char *buf, const char *eol);
194 
195 		bool parseAnyReqLine(const char *buf, const char *eorl);
196 
197 		virtual bool parseAcceptEncoding(const char *buf, const char *eoh);
198 		virtual bool parseGetReqLine(const char *buf, const char *eorl);
199 		virtual bool parseHeadReqLine(const char *buf, const char *eorl);
200 		virtual bool parsePostReqLine(const char *buf, const char *eorl);
201 		virtual bool parsePutReqLine(const char *buf, const char *eorl);
202 		virtual bool parsePatchReqLine(const char *buf, const char *eorl);
203 		virtual bool parseHost(const char *buf, const char *eoh);
204 		virtual bool parseIms(const char *buf, const char *eoh);
205 		virtual bool parseXLocWorld(const char *buf, const char *eoh);
206 		virtual bool parseRange(const char *buf, const char *eoh);
207 		virtual bool parseExpect(const char *buf, const char *eoh);
208 
209 	protected:
210 		static MsgHdrParsTab *TheParsTab;
211 
212 	public:
213 		HttpUri theUri;
214 		Time theIms;          // If-Modified-Since timestamp
215 		ObjWorld theLocWorld; // local world info
216 		bool isHealthCheck;
217 		bool isAcceptingGzip;
218 
219 		RangeList theRanges;
220 		bool expect100Continue;
221 };
222 
223 class RepHdr: public MsgHdr {
224 	public:
225 		enum StatusCode { scUnknown = -1, sc100_Continue = 100,
226 			sc200_OK = 200, sc202_Accepted = 202,
227 			sc204_NoContent = 204, sc206_PartialContent = 206,
228 			sc300_Choices = 300, sc302_Found = 302, sc303_Other = 303,
229 			sc304_NotModified = 304, sc307_TmpRedir = 307,
230 			sc401_Unauthorized = 401,
231 			sc403_Forbidden = 403,
232 			sc406_NotAcceptable = 406,
233 			sc407_ProxyAuthRequired = 407,
234 			sc416_RequestedRangeNotSatisfiable = 416,
235 			sc417_ExpectationFailed = 417 };
236 
237 		static bool PositiveStatusCode(int code);
238 
239 	public:
240 		static void Configure();
241 		static void Clean();
242 
243 	public:
244 		RepHdr();
245 
246 		virtual void reset();
247 
polyHeaders()248 		bool polyHeaders() const { return theXactId || theTarget; }
249 		bool expectPolyHeaders() const;
250 		bool expectBody() const;
251 
252 		bool redirect() const;
253 
254 		Time calcLmt() const;
255 
256 		virtual void store(OLog &log) const;
257 
258 		bool acceptedEncoding(int coding) const;
259 
260 	protected:
261 		virtual bool parseRLine(const char *buf, const char *eol);
262 
263 		virtual bool parseServer(const char *buf, const char *eoh);
264 		virtual bool parseProxyAuthenticate(const char *buf, const char *eoh);
265 		virtual bool parseWwwAuthenticate(const char *buf, const char *eoh);
266 		virtual bool parseLocation(const char *buf, const char *eoh);
267 		virtual bool parseLMT(const char *buf, const char *eoh);
268 		virtual bool parseExpires(const char *buf, const char *eoh);
269 		virtual bool parseContRange(const char *buf, const char *eoh);
270 
271 	protected:
272 		static MsgHdrParsTab *TheParsTab;
273 
274 	public:
275 		HttpUri theLocn;  // Location: in 3xx responses
276 		Time theLMT;      // Last-Modified-Time header field
277 		Time theExpires;  // Expires HTTP header field
278 		int theStatus;    // HTTP status code
279 
280 		String theServer; // server agent string
281 		AuthChallenge theProxyAuthenticate; // proxy authenticate header
282 		AuthChallenge theOriginAuthenticate; // origin authenticate header
283 		Size theContRangeFirstByte;
284 		Size theContRangeLastByte;
285 		Size theContRangeInstanceLength;
286 };
287 
crlf(ostream & os)288 inline ostream &crlf(ostream &os) { return os.write("\r\n", 2); }
289 
290 #endif
291