1 
2 /* Web Polygraph       http://www.web-polygraph.org/
3  * Copyright 2003-2011 The Measurement Factory
4  * Licensed under the Apache License, Version 2.0 */
5 
6 #include "base/polygraph.h"
7 
8 #include <limits.h>
9 
10 #include "xstd/h/iostream.h"
11 #include <fstream>
12 #include "xstd/h/iomanip.h"
13 
14 #include "xstd/Rnd.h"
15 #include "base/RndPermut.h"
16 #include "xstd/Ring.h"
17 #include "tools/IntIntHash.h"
18 #include "base/CmdLine.h"
19 #include "base/opts.h"
20 #include "base/polyOpts.h"
21 #include "xstd/gadgets.h"
22 
23 
24 class MyOpts: public OptGrp {
25 	public:
MyOpts()26 		MyOpts():
27 			theHelpOpt(this,      "help",             "list of options"),
28 			theVersOpt(this,      "version",          "package version info"),
29 			theOutFileName(this,  "out <file>",       "redirect console output", "-"),
30 			theCachabRatio(this,  "cachable <%>",     "portion of cachable replies", 0.80),
31 			thePublicRatio(this,  "public_interest <%>", "portion of URLs shared among all robots", 0.50),
32 			theRecurrRatio(this,  "recurrence <%>",   "probability of a re-visit to a URL", 0.55/0.80),
33 			theWorkSetSize(this,  "work_set_size <size>","working set size"),
34 			theCacheSize(this,    "cache_size <size>","cache size", BigSize::MB(100)),
35 			theObjSize(this,      "obj_size <size>",  "average object size", Size::KB(13)),
36 			theRobCount(this,     "robots <int>",     "total number of robots to simulate", 1),
37 			thePopModel(this,     "pop_model <str>",  "popularity model", "unif"),
38 			theSimLength(this,    "sim_length <int>", "total number of request to simulate", 50000000)
39 			{}
40 
41 		virtual bool validate() const;
42 
43 	public:
44 		HelpOpt theHelpOpt;
45 		VersionOpt theVersOpt;
46 		StrOpt theOutFileName;
47 		DblOpt theCachabRatio;
48 		DblOpt thePublicRatio;
49 		DblOpt theRecurrRatio;
50 		BigSizeOpt theWorkSetSize;
51 		BigSizeOpt theCacheSize;
52 		SizeOpt theObjSize;
53 		IntOpt theRobCount;
54 		StrOpt thePopModel;
55 		IntOpt theSimLength;
56 } TheOpts;
57 
58 class Robot {
59 	public:
60 		Robot(int anOidOff);
61 
62 		void step();
63 
64 	protected:
65 		int genOid();
66 
67 	protected:
68 		int theOidOff;
69 		int thePrivOidCnt;
70 };
71 
72 class Cache {
73 	public:
74 		Cache(int aCapacity);
75 
dhp() const76 		double dhp() const { return Percent(theHitCount, theReqCount); }
77 		double intervalDhp();
utilp() const78 		double utilp() const { return Percent(theSize, theCapacity); }
full() const79 		bool full() const { return theSize >= theCapacity; }
capacity() const80 		int capacity() const { return theCapacity; }
fill() const81 		int fill() const { return theFill; }
reqs() const82 		int reqs() const { return theReqCount; }
hash() const83 		const IntIntHash &hash() const { return theIndex; }
84 
85 		void noteObject(int oid);
86 
87 	protected:
88 		void purge();
89 
90 	protected:
91 		int theCapacity;
92 		int theSize;
93 		int theFill;
94 
95 		Ring<int> theRepPolicy;
96 		IntIntHash theIndex;
97 
98 		int theHitCount;
99 		int theReqCount;
100 
101 		int theIntvlHitCount;
102 		int theIntvlReqCount;
103 };
104 
105 
106 static
107 struct Server {
108 	int theLastOid;
109 } TheServer;
110 
111 
112 typedef int (*PopModelPtr)(RndGen &rng, int lastOid);
113 static int PopModel(RndGen &rng, int lastOid, int wsCap);
114 
115 static Array<Robot*> TheRobots;
116 static Cache *TheCache = 0;
117 static int TheOidLmt = -1; // limit for private and shared oids
118 
119 static int TheTotlWorkSetCap = -1;
120 static int ThePrivWorkSubsetCap = -1;
121 static int TheShrdWorkSubsetCap = -1;
122 
123 static PopModelPtr ThePopModel = 0;
124 static double ThePopModelParam = 0;
125 
126 
validate() const127 bool MyOpts::validate() const {
128 	if (theWorkSetSize <= 0)
129 		cerr << "working set size must be specified" << endl;
130 	else
131 		return true;
132 	return false;
133 }
134 
135 
136 /* Robot */
137 
Robot(int anOidOff)138 Robot::Robot(int anOidOff):	theOidOff(anOidOff), thePrivOidCnt(0) {
139 }
140 
141 // mimics Client::genOid
genOid()142 int Robot::genOid() {
143 	static RndGen rng;
144 	const bool publicOid = rng() < TheOpts.thePublicRatio;
145 	const bool repeatOid = rng() < TheOpts.theRecurrRatio;
146 
147 	if (publicOid) {
148 		if (repeatOid && TheServer.theLastOid > 0) {
149 			return PopModel(rng, TheServer.theLastOid, TheShrdWorkSubsetCap);
150 		}
151 		Assert(TheServer.theLastOid < TheOidLmt);
152 		return ++TheServer.theLastOid;
153 	}
154 
155 	if (repeatOid && thePrivOidCnt > 0)
156 		return theOidOff + PopModel(rng, thePrivOidCnt, ThePrivWorkSubsetCap);
157 
158 	Assert(thePrivOidCnt < TheOidLmt);
159 	return theOidOff + (++thePrivOidCnt);
160 }
161 
step()162 void Robot::step() {
163 	const int oid = genOid();
164 	TheCache->noteObject(oid);
165 }
166 
167 
168 /* Cache */
169 
Cache(int aCapacity)170 Cache::Cache(int aCapacity): theCapacity(aCapacity), theSize(0), theFill(0),
171 	theRepPolicy(4*aCapacity), theIndex(aCapacity),
172 	theHitCount(0), theReqCount(0), theIntvlHitCount(0), theIntvlReqCount(0) {
173 }
174 
noteObject(int oid)175 void Cache::noteObject(int oid) {
176 	Assert(oid > 0);
177 	theReqCount++;
178 	theIntvlReqCount++;
179 
180 	RndGen rng(LclPermut(oid));
181 	if (rng() > TheOpts.theCachabRatio)
182 		return; // uncachable object
183 
184 	IntIntHash::Loc loc;
185 	if (theIndex.find(oid, loc)) {
186 		theHitCount++;
187 		theIntvlHitCount++;
188 		theIndex[loc]++;
189 	} else {
190 		const bool wasFull = full();
191 		theIndex.addAt(loc, oid, 1);
192 		theSize++;
193 		theFill++;
194 
195 		if (wasFull)
196 			purge();
197 	}
198 
199 	Assert(!theRepPolicy.full());
200 	theRepPolicy.enqueue(oid);
201 }
202 
purge()203 void Cache::purge() {
204 	Assert(theSize > 0);
205 	while (1) {
206 		Assert(!theRepPolicy.empty());
207 		const int oid = theRepPolicy.dequeue();
208 		IntIntHash::Loc loc;
209 		Assert(theIndex.find(oid, loc));
210 		int &ttl = theIndex[loc];
211 		Assert(ttl > 0);
212 		if (--ttl == 0) {
213 			theIndex.delAt(loc);
214 			break;
215 		}
216 	}
217 	theSize--;
218 }
219 
intervalDhp()220 double Cache::intervalDhp() {
221 	const double res = Percent(theIntvlHitCount, theIntvlReqCount);
222 	theIntvlHitCount = theIntvlReqCount = 0;
223 	return res;
224 }
225 
226 static
configureLogs(int prec)227 void configureLogs(int prec) {
228 	if (TheOpts.theOutFileName && TheOpts.theOutFileName != "-")
229 		redirectOutput(TheOpts.theOutFileName.cstr());
230 
231 	configureStream(cout, prec);
232 	configureStream(cerr, prec);
233 	configureStream(clog, prec);
234 }
235 
236 
237 /* Pop Models */
238 
239 static
UnifPopModel(RndGen & rng,int lastOid)240 int UnifPopModel(RndGen &rng, int lastOid) {
241 	return 1 + rng(0, lastOid);
242 }
243 
244 
Zipf(double alpha,RndGen & rng,int lastOid)245 int Zipf(double alpha, RndGen &rng, int lastOid) {
246 	const double rn = rng();
247 	return (int)pow(lastOid+1, pow(rn,alpha));
248 }
249 
250 static
ZipfPopModel(RndGen & rng,int lastOid)251 int ZipfPopModel(RndGen &rng, int lastOid) {
252 	return 1 + lastOid - Zipf(ThePopModelParam, rng, lastOid);
253 }
254 
logd(double x)255 inline double logd(double x) { return log(x); }
256 
257 static
ZipdPopModel(RndGen & rng,int lastOid)258 int ZipdPopModel(RndGen &rng, int lastOid) {
259 	if (lastOid == 1 || ThePopModelParam >= 1)
260 		return lastOid;
261 	const double alpha = logd(logd(2)/logd(lastOid+1)) / logd(ThePopModelParam);
262 	return 1 + lastOid - Zipf(alpha, rng, lastOid);
263 }
264 
265 static
PopModel(RndGen & rng,int lastOid,int wsCap)266 int PopModel(RndGen &rng, int lastOid, int wsCap) {
267 	const int offset = lastOid > wsCap ? lastOid-wsCap : 0;
268 	return offset + ThePopModel(rng, lastOid-offset);
269 }
270 
271 // set some general stuff and
272 // propogate cmd line options to corresponding objects
273 static
configure()274 void configure() {
275 	configureLogs(2);
276 
277 	// this is total work set size
278 	TheTotlWorkSetCap = 1 + (int) (TheOpts.theWorkSetSize / BigSize(TheOpts.theObjSize));
279 
280 	// compute private work subsets for robots and "shared" subset
281 	ThePrivWorkSubsetCap = 1 + (int)((1-TheOpts.thePublicRatio)*TheTotlWorkSetCap/(int)TheOpts.theRobCount);
282 	TheShrdWorkSubsetCap = 1 + (int)(TheOpts.thePublicRatio*TheTotlWorkSetCap);
283 
284 	// note: we do not adjust subsets for uncachable objects because
285 	// robots have no idea and do not care what is cachable
286 
287 	String pmName = TheOpts.thePopModel;
288 	if (const char *p = pmName.chr(':')) {
289 		isNum(p+1, ThePopModelParam);
290 		pmName = pmName(0, p-pmName.cstr());
291 	}
292 
293 	if (TheOpts.thePopModel == "unif") {
294 		ThePopModel = &UnifPopModel;
295 	} else
296 	if (pmName == "zipf") {
297 		ThePopModel = &ZipfPopModel;
298 		if (ThePopModelParam <= 0)
299 			ThePopModelParam = 1;
300 	} else
301 	if (pmName == "zipd") {
302 		ThePopModel = &ZipdPopModel;
303 		if (ThePopModelParam <= 0)
304 			ThePopModelParam = 0.5/100;
305 	} else {
306 		cerr << "unknown popularity model `" << TheOpts.thePopModel << "'" << endl;
307 		exit(-1);
308 	}
309 
310 	const int objInCache = 1 + (int) (TheOpts.theCacheSize / BigSize(TheOpts.theObjSize));
311 	TheCache = new Cache(objInCache);
312 
313 	// oid limits
314 	TheOidLmt = INT_MAX / (1 + (int)TheOpts.theRobCount);
315 
316 	for (int i = 0; i < TheOpts.theRobCount; ++i)
317 		TheRobots.append(new Robot((i+1)*TheOidLmt));
318 }
319 
320 static
report()321 void report() {
322 	static int repCnt = 0;
323 	if (!repCnt++) {
324 		cout << '#'
325 			<< ' ' << setw(8) << "reqs"
326 			<< ' ' << setw(8) << "fill#"
327 			<< ' ' << setw(8) << "fill%"
328 			<< ' ' << setw(6) << "DHRi"
329 			<< ' ' << setw(6) << "DHR"
330 			<< endl;
331 	}
332 
333 	cout << 'i'
334 		<< ' ' << setw(8) << TheCache->reqs()
335 		<< ' ' << setw(8) << TheCache->fill()
336 		<< ' ' << setw(8) << (int)Percent(TheCache->fill(), TheCache->capacity())
337 		<< ' ' << setw(6) << TheCache->intervalDhp()
338 		<< ' ' << setw(6) << TheCache->dhp()
339 		<< endl;
340 }
341 
342 static
run()343 void run() {
344 	static RndGen rng;
345 
346 	bool full = TheCache->full();
347 	if (!full)
348 		cout << "# filling the cache..." << endl;
349 
350 	const int repCycle = Max(1, TheOpts.theSimLength/1000, TheCache->capacity() / 20);
351 	for (int i = TheOpts.theSimLength; i; --i) {
352 		// select a random robot
353 		Robot *robot = TheRobots[rng(0, TheRobots.count())];
354 		robot->step();
355 
356 		if ((full && abs(i) % repCycle == 1) || (!full && TheCache->full()))
357 			report();
358 
359 		full = TheCache->full();
360 	}
361 }
362 
main(int argc,char * argv[])363 int main(int argc, char *argv[]) {
364 	CmdLine cmd;
365 	cmd.configure(Array<OptGrp*>() << &TheOpts);
366 	if (!cmd.parse(argc, argv) || !TheOpts.validate())
367 		return -1;
368 
369 	configure();
370 	cmd.report(cout);
371 
372 	cout << "# " << TheOpts.theCacheSize << " cache fits " << TheCache->capacity() << " objects" << endl;
373 	cout << "# " << "working set is " << TheOpts.theWorkSetSize << " or " << TheTotlWorkSetCap << " objects" << endl;
374 	cout << "# " << "working set split: " << TheRobots.count() << " * " << ThePrivWorkSubsetCap << " + " << TheShrdWorkSubsetCap << " objects" << endl;
375 	cout << "# " << "server and robot world limit is " << TheOidLmt << " oids each" << endl;
376 
377 	run();
378 
379 	return 0;
380 }
381