1 /*
2 Copyright (C) 2009 Andrew Caudwell (acaudwell@gmail.com)
3
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version
7 3 of the License, or (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "apache.h"
19 #include <time.h>
20
21 const char* months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug" , "Sep", "Oct", "Nov", "Dec" };
22 Regex apache_entry_start("^(?:[^ ]+ )?([^ ]+) +[^ ]+ +([^ ]+) +\\[(.*?)\\] +(.*)$");
23 Regex apache_entry_date("(\\d+)/([A-Za-z]+)/(\\d+):(\\d+):(\\d+):(\\d+) ([+-])(\\d+)");
24 Regex apache_entry_request("\"([^ ]+) +([^ ]+) +([^ ]+)\" +([^ ]+) +([^\\s+]+)(.*)");
25 Regex apache_entry_agent(" +\"([^\"]+)\" +\"([^\"]+)\" +\"([^\"]+)\"");
26 Regex apache_hostname_parts("([^.]+)(?:\\.([^.]+))?(?:\\.([^.]+))?(?:\\.([^.]+))?(?:\\.([^.]+))?(?:\\.([^.]+))?(?:\\.([^.]+))?(?:\\.([^.]+))?$");
27
ApacheCombinedLog(const std::string & logfile)28 ApacheCombinedLog::ApacheCombinedLog(const std::string& logfile) : RCommitLog(logfile) {
29 }
30
31 //parse apache access.log entry into components
parseCommit(RCommit & commit)32 bool ApacheCombinedLog::parseCommit(RCommit& commit) {
33
34 std::string line;
35 std::vector<std::string> matches;
36
37 if(!logf->getNextLine(line)) return false;
38
39 apache_entry_start.match(line, &matches);
40
41 if(matches.size()!=4) {
42 return 0;
43 }
44
45 //get details
46 commit.username = matches[0];
47
48 std::string request_str = matches[3];
49 std::string datestr = matches[2];
50
51 apache_entry_date.match(datestr, &matches);
52
53 if(matches.size()!=8) {
54 return 0;
55 }
56
57 //parse timestamp
58 int day = atoi(matches[0].c_str());
59 int year = atoi(matches[2].c_str());
60 int hour = atoi(matches[3].c_str());
61 int minute = atoi(matches[4].c_str());
62 int second = atoi(matches[5].c_str());
63
64 int month=0;
65 for(int i=0;i<12;i++) {
66 if(matches[1] == months[i]) {
67 month=i;
68 break;
69 }
70 }
71
72 struct tm time_str;
73 time_str.tm_year = year - 1900;
74 time_str.tm_mon = month;
75 time_str.tm_mday = day;
76 time_str.tm_hour = hour;
77 time_str.tm_min = minute;
78 time_str.tm_sec = second;
79 time_str.tm_isdst = -1;
80
81 commit.timestamp = mktime(&time_str);
82
83 matches.clear();
84 apache_entry_request.match(request_str, &matches);
85
86 if(matches.size() < 5) {
87 return false;
88 }
89
90 std::string rtype = matches[0];
91 std::string file = matches[1];
92 std::string proto = matches[2];
93
94 int code = atoi(matches[3].c_str());
95 int bytes = atol(matches[4].c_str());
96
97 //remove args from url
98 size_t argpos = file.rfind("?");
99 if(argpos != std::string::npos) {
100 file = file.substr(0,argpos);
101 }
102
103 if(file.size()==0) file = "/";
104
105 //name index pages
106 if(file[file.size()-1] == '/') {
107 file += "index.html";
108 }
109
110 std::string action = "A";
111 commit.addFile(file, action);
112
113 std::string refer;
114 std::string agent;
115
116 if(matches.size() > 5) {
117 std::string agentstr = matches[5];
118 matches.clear();
119 apache_entry_agent.match(agentstr, &matches);
120
121 if(matches.size()>1) {
122 refer = matches[0];
123 agent = matches[1];
124 }
125 }
126
127 return true;
128 }
129