1
2 /* Web Polygraph http://www.web-polygraph.org/
3 * Copyright 2003-2011 The Measurement Factory
4 * Licensed under the Apache License, Version 2.0 */
5
6 #include "base/polygraph.h"
7
8 #include <limits.h>
9
10 #include "xstd/h/iostream.h"
11 #include <fstream>
12 #include "xstd/h/iomanip.h"
13
14 #include "xstd/Rnd.h"
15 #include "base/RndPermut.h"
16 #include "xstd/Ring.h"
17 #include "tools/IntIntHash.h"
18 #include "base/CmdLine.h"
19 #include "base/opts.h"
20 #include "base/polyOpts.h"
21 #include "xstd/gadgets.h"
22
23
24 class MyOpts: public OptGrp {
25 public:
MyOpts()26 MyOpts():
27 theHelpOpt(this, "help", "list of options"),
28 theVersOpt(this, "version", "package version info"),
29 theOutFileName(this, "out <file>", "redirect console output", "-"),
30 theCachabRatio(this, "cachable <%>", "portion of cachable replies", 0.80),
31 thePublicRatio(this, "public_interest <%>", "portion of URLs shared among all robots", 0.50),
32 theRecurrRatio(this, "recurrence <%>", "probability of a re-visit to a URL", 0.55/0.80),
33 theWorkSetSize(this, "work_set_size <size>","working set size"),
34 theCacheSize(this, "cache_size <size>","cache size", BigSize::MB(100)),
35 theObjSize(this, "obj_size <size>", "average object size", Size::KB(13)),
36 theRobCount(this, "robots <int>", "total number of robots to simulate", 1),
37 thePopModel(this, "pop_model <str>", "popularity model", "unif"),
38 theSimLength(this, "sim_length <int>", "total number of request to simulate", 50000000)
39 {}
40
41 virtual bool validate() const;
42
43 public:
44 HelpOpt theHelpOpt;
45 VersionOpt theVersOpt;
46 StrOpt theOutFileName;
47 DblOpt theCachabRatio;
48 DblOpt thePublicRatio;
49 DblOpt theRecurrRatio;
50 BigSizeOpt theWorkSetSize;
51 BigSizeOpt theCacheSize;
52 SizeOpt theObjSize;
53 IntOpt theRobCount;
54 StrOpt thePopModel;
55 IntOpt theSimLength;
56 } TheOpts;
57
58 class Robot {
59 public:
60 Robot(int anOidOff);
61
62 void step();
63
64 protected:
65 int genOid();
66
67 protected:
68 int theOidOff;
69 int thePrivOidCnt;
70 };
71
72 class Cache {
73 public:
74 Cache(int aCapacity);
75
dhp() const76 double dhp() const { return Percent(theHitCount, theReqCount); }
77 double intervalDhp();
utilp() const78 double utilp() const { return Percent(theSize, theCapacity); }
full() const79 bool full() const { return theSize >= theCapacity; }
capacity() const80 int capacity() const { return theCapacity; }
fill() const81 int fill() const { return theFill; }
reqs() const82 int reqs() const { return theReqCount; }
hash() const83 const IntIntHash &hash() const { return theIndex; }
84
85 void noteObject(int oid);
86
87 protected:
88 void purge();
89
90 protected:
91 int theCapacity;
92 int theSize;
93 int theFill;
94
95 Ring<int> theRepPolicy;
96 IntIntHash theIndex;
97
98 int theHitCount;
99 int theReqCount;
100
101 int theIntvlHitCount;
102 int theIntvlReqCount;
103 };
104
105
106 static
107 struct Server {
108 int theLastOid;
109 } TheServer;
110
111
112 typedef int (*PopModelPtr)(RndGen &rng, int lastOid);
113 static int PopModel(RndGen &rng, int lastOid, int wsCap);
114
115 static Array<Robot*> TheRobots;
116 static Cache *TheCache = 0;
117 static int TheOidLmt = -1; // limit for private and shared oids
118
119 static int TheTotlWorkSetCap = -1;
120 static int ThePrivWorkSubsetCap = -1;
121 static int TheShrdWorkSubsetCap = -1;
122
123 static PopModelPtr ThePopModel = 0;
124 static double ThePopModelParam = 0;
125
126
validate() const127 bool MyOpts::validate() const {
128 if (theWorkSetSize <= 0)
129 cerr << "working set size must be specified" << endl;
130 else
131 return true;
132 return false;
133 }
134
135
136 /* Robot */
137
Robot(int anOidOff)138 Robot::Robot(int anOidOff): theOidOff(anOidOff), thePrivOidCnt(0) {
139 }
140
141 // mimics Client::genOid
genOid()142 int Robot::genOid() {
143 static RndGen rng;
144 const bool publicOid = rng() < TheOpts.thePublicRatio;
145 const bool repeatOid = rng() < TheOpts.theRecurrRatio;
146
147 if (publicOid) {
148 if (repeatOid && TheServer.theLastOid > 0) {
149 return PopModel(rng, TheServer.theLastOid, TheShrdWorkSubsetCap);
150 }
151 Assert(TheServer.theLastOid < TheOidLmt);
152 return ++TheServer.theLastOid;
153 }
154
155 if (repeatOid && thePrivOidCnt > 0)
156 return theOidOff + PopModel(rng, thePrivOidCnt, ThePrivWorkSubsetCap);
157
158 Assert(thePrivOidCnt < TheOidLmt);
159 return theOidOff + (++thePrivOidCnt);
160 }
161
step()162 void Robot::step() {
163 const int oid = genOid();
164 TheCache->noteObject(oid);
165 }
166
167
168 /* Cache */
169
Cache(int aCapacity)170 Cache::Cache(int aCapacity): theCapacity(aCapacity), theSize(0), theFill(0),
171 theRepPolicy(4*aCapacity), theIndex(aCapacity),
172 theHitCount(0), theReqCount(0), theIntvlHitCount(0), theIntvlReqCount(0) {
173 }
174
noteObject(int oid)175 void Cache::noteObject(int oid) {
176 Assert(oid > 0);
177 theReqCount++;
178 theIntvlReqCount++;
179
180 RndGen rng(LclPermut(oid));
181 if (rng() > TheOpts.theCachabRatio)
182 return; // uncachable object
183
184 IntIntHash::Loc loc;
185 if (theIndex.find(oid, loc)) {
186 theHitCount++;
187 theIntvlHitCount++;
188 theIndex[loc]++;
189 } else {
190 const bool wasFull = full();
191 theIndex.addAt(loc, oid, 1);
192 theSize++;
193 theFill++;
194
195 if (wasFull)
196 purge();
197 }
198
199 Assert(!theRepPolicy.full());
200 theRepPolicy.enqueue(oid);
201 }
202
purge()203 void Cache::purge() {
204 Assert(theSize > 0);
205 while (1) {
206 Assert(!theRepPolicy.empty());
207 const int oid = theRepPolicy.dequeue();
208 IntIntHash::Loc loc;
209 Assert(theIndex.find(oid, loc));
210 int &ttl = theIndex[loc];
211 Assert(ttl > 0);
212 if (--ttl == 0) {
213 theIndex.delAt(loc);
214 break;
215 }
216 }
217 theSize--;
218 }
219
intervalDhp()220 double Cache::intervalDhp() {
221 const double res = Percent(theIntvlHitCount, theIntvlReqCount);
222 theIntvlHitCount = theIntvlReqCount = 0;
223 return res;
224 }
225
226 static
configureLogs(int prec)227 void configureLogs(int prec) {
228 if (TheOpts.theOutFileName && TheOpts.theOutFileName != "-")
229 redirectOutput(TheOpts.theOutFileName.cstr());
230
231 configureStream(cout, prec);
232 configureStream(cerr, prec);
233 configureStream(clog, prec);
234 }
235
236
237 /* Pop Models */
238
239 static
UnifPopModel(RndGen & rng,int lastOid)240 int UnifPopModel(RndGen &rng, int lastOid) {
241 return 1 + rng(0, lastOid);
242 }
243
244
Zipf(double alpha,RndGen & rng,int lastOid)245 int Zipf(double alpha, RndGen &rng, int lastOid) {
246 const double rn = rng();
247 return (int)pow(lastOid+1, pow(rn,alpha));
248 }
249
250 static
ZipfPopModel(RndGen & rng,int lastOid)251 int ZipfPopModel(RndGen &rng, int lastOid) {
252 return 1 + lastOid - Zipf(ThePopModelParam, rng, lastOid);
253 }
254
logd(double x)255 inline double logd(double x) { return log(x); }
256
257 static
ZipdPopModel(RndGen & rng,int lastOid)258 int ZipdPopModel(RndGen &rng, int lastOid) {
259 if (lastOid == 1 || ThePopModelParam >= 1)
260 return lastOid;
261 const double alpha = logd(logd(2)/logd(lastOid+1)) / logd(ThePopModelParam);
262 return 1 + lastOid - Zipf(alpha, rng, lastOid);
263 }
264
265 static
PopModel(RndGen & rng,int lastOid,int wsCap)266 int PopModel(RndGen &rng, int lastOid, int wsCap) {
267 const int offset = lastOid > wsCap ? lastOid-wsCap : 0;
268 return offset + ThePopModel(rng, lastOid-offset);
269 }
270
271 // set some general stuff and
272 // propogate cmd line options to corresponding objects
273 static
configure()274 void configure() {
275 configureLogs(2);
276
277 // this is total work set size
278 TheTotlWorkSetCap = 1 + (int) (TheOpts.theWorkSetSize / BigSize(TheOpts.theObjSize));
279
280 // compute private work subsets for robots and "shared" subset
281 ThePrivWorkSubsetCap = 1 + (int)((1-TheOpts.thePublicRatio)*TheTotlWorkSetCap/(int)TheOpts.theRobCount);
282 TheShrdWorkSubsetCap = 1 + (int)(TheOpts.thePublicRatio*TheTotlWorkSetCap);
283
284 // note: we do not adjust subsets for uncachable objects because
285 // robots have no idea and do not care what is cachable
286
287 String pmName = TheOpts.thePopModel;
288 if (const char *p = pmName.chr(':')) {
289 isNum(p+1, ThePopModelParam);
290 pmName = pmName(0, p-pmName.cstr());
291 }
292
293 if (TheOpts.thePopModel == "unif") {
294 ThePopModel = &UnifPopModel;
295 } else
296 if (pmName == "zipf") {
297 ThePopModel = &ZipfPopModel;
298 if (ThePopModelParam <= 0)
299 ThePopModelParam = 1;
300 } else
301 if (pmName == "zipd") {
302 ThePopModel = &ZipdPopModel;
303 if (ThePopModelParam <= 0)
304 ThePopModelParam = 0.5/100;
305 } else {
306 cerr << "unknown popularity model `" << TheOpts.thePopModel << "'" << endl;
307 exit(-1);
308 }
309
310 const int objInCache = 1 + (int) (TheOpts.theCacheSize / BigSize(TheOpts.theObjSize));
311 TheCache = new Cache(objInCache);
312
313 // oid limits
314 TheOidLmt = INT_MAX / (1 + (int)TheOpts.theRobCount);
315
316 for (int i = 0; i < TheOpts.theRobCount; ++i)
317 TheRobots.append(new Robot((i+1)*TheOidLmt));
318 }
319
320 static
report()321 void report() {
322 static int repCnt = 0;
323 if (!repCnt++) {
324 cout << '#'
325 << ' ' << setw(8) << "reqs"
326 << ' ' << setw(8) << "fill#"
327 << ' ' << setw(8) << "fill%"
328 << ' ' << setw(6) << "DHRi"
329 << ' ' << setw(6) << "DHR"
330 << endl;
331 }
332
333 cout << 'i'
334 << ' ' << setw(8) << TheCache->reqs()
335 << ' ' << setw(8) << TheCache->fill()
336 << ' ' << setw(8) << (int)Percent(TheCache->fill(), TheCache->capacity())
337 << ' ' << setw(6) << TheCache->intervalDhp()
338 << ' ' << setw(6) << TheCache->dhp()
339 << endl;
340 }
341
342 static
run()343 void run() {
344 static RndGen rng;
345
346 bool full = TheCache->full();
347 if (!full)
348 cout << "# filling the cache..." << endl;
349
350 const int repCycle = Max(1, TheOpts.theSimLength/1000, TheCache->capacity() / 20);
351 for (int i = TheOpts.theSimLength; i; --i) {
352 // select a random robot
353 Robot *robot = TheRobots[rng(0, TheRobots.count())];
354 robot->step();
355
356 if ((full && abs(i) % repCycle == 1) || (!full && TheCache->full()))
357 report();
358
359 full = TheCache->full();
360 }
361 }
362
main(int argc,char * argv[])363 int main(int argc, char *argv[]) {
364 CmdLine cmd;
365 cmd.configure(Array<OptGrp*>() << &TheOpts);
366 if (!cmd.parse(argc, argv) || !TheOpts.validate())
367 return -1;
368
369 configure();
370 cmd.report(cout);
371
372 cout << "# " << TheOpts.theCacheSize << " cache fits " << TheCache->capacity() << " objects" << endl;
373 cout << "# " << "working set is " << TheOpts.theWorkSetSize << " or " << TheTotlWorkSetCap << " objects" << endl;
374 cout << "# " << "working set split: " << TheRobots.count() << " * " << ThePrivWorkSubsetCap << " + " << TheShrdWorkSubsetCap << " objects" << endl;
375 cout << "# " << "server and robot world limit is " << TheOidLmt << " oids each" << endl;
376
377 run();
378
379 return 0;
380 }
381