1
2 /* Web Polygraph http://www.web-polygraph.org/
3 * Copyright 2003-2011 The Measurement Factory
4 * Licensed under the Apache License, Version 2.0 */
5
6 #include "base/polygraph.h"
7
8 #include "xstd/h/iostream.h"
9 #include <fstream>
10 #include <ctype.h>
11
12 #include "xstd/NetAddr.h"
13 #include "base/AddrParsers.h"
14 #include "base/ForeignTrace.h"
15
16
ForeignTrace()17 ForeignTrace::ForeignTrace(): doIgnoreBad(true) {
18 }
19
configure(const String & aName,bool ignoreBad)20 void ForeignTrace::configure(const String &aName, bool ignoreBad) {
21 theName = aName;
22 doIgnoreBad = ignoreBad;
23 }
24
25
gatherUrls(Array<String * > & urls) const26 int ForeignTrace::gatherUrls(Array<String*> &urls) const {
27 ifstream is(theName.cstr());
28 if (open(is)) {
29 while (String *url = getUrl(is)) {
30 urls.append(url);
31 theMemSize += url->len(); // content
32 theMemSize += SizeOf(String); // overhead
33 theMemSize += SizeOf(String*); // overhead
34 }
35 close(urls.count());
36 }
37 return urls.count();
38 }
39
gatherHosts(Array<NetAddr * > & hosts) const40 int ForeignTrace::gatherHosts(Array<NetAddr*> &hosts) const {
41 ifstream is(theName.cstr());
42 if (open(is)) {
43 while (String *url = getUrl(is)) {
44 // extract host from the URL
45 NetAddr host;
46 const char *urlB = url->cstr();
47 const char *urlE = urlB + url->len();
48 if (const char *hostEnd = SkipHostInUri(urlB, urlE, host)) {
49 hosts.append(new NetAddr(host));
50 theMemSize += hostEnd - urlB; // content
51 theMemSize += SizeOf(NetAddr); // overhead
52 theMemSize += SizeOf(NetAddr*); // overhead
53 }
54 delete url;
55 }
56 close(hosts.count());
57 }
58 return hosts.count();
59 }
60
getUrl(istream & is) const61 String *ForeignTrace::getUrl(istream &is) const {
62 char line[16*1024];
63 while (is.good() && is.getline(line, sizeof(line))) {
64
65 // delete comments
66 if (char *comment = strchr(line, '#'))
67 *comment = '\0';
68
69 // find first URL on the line
70 const char *urlB;
71 if ((urlB = strstr(line, "http://")) ||
72 (urlB = strstr(line, "https://")) ||
73 (urlB = strstr(line, "ftp://"))) {
74 // find URL end (white space or eol)
75 const char *urlE = urlB;
76 while (*urlE && !isspace(*urlE))
77 ++urlE;
78
79 String *url = new String;
80 url->append(urlB, urlE - urlB);
81 ++theEntryCount;
82 return url;
83 }
84
85 // skip leading spaces to avoid warning about empty lines
86 urlB = line;
87 while (*urlB && isspace(*urlB))
88 ++urlB;
89
90 if (*urlB && !doIgnoreBad) {
91 cerr << here <<
92 "error: all trace URLs must follow " <<
93 "http|https|ftp://host/path format; " << endl <<
94 "\tfound: " << urlB << endl;
95 }
96 }
97
98 return 0;
99 }
100
open(istream & is) const101 bool ForeignTrace::open(istream &is) const {
102 theMemSize = 0;
103 theEntryCount = 0;
104 if (!is) {
105 cerr << "failed to open '" << theName << "' trace for reading: " <<
106 Error::Last() << endl;
107 return false;
108 }
109 return true;
110 }
111
close(const int goodCount) const112 void ForeignTrace::close(const int goodCount) const {
113 if (goodCount > 0)
114 clog << "fyi: loaded trace from ";
115 else
116 clog << "warning: empty trace in ";
117
118 clog << "'" << theName << "': " <<
119 "used " << goodCount << " entries out of " << theEntryCount << ", " <<
120 "spent at least " << theMemSize << " bytes" <<
121 endl;
122 }
123