1 /*
2     Copyright (C) 2009 Andrew Caudwell (acaudwell@gmail.com)
3 
4     This program is free software; you can redistribute it and/or
5     modify it under the terms of the GNU General Public License
6     as published by the Free Software Foundation; either version
7     3 of the License, or (at your option) any later version.
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 
18 #include "apache.h"
19 #include <time.h>
20 
21 const char* months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug" , "Sep", "Oct", "Nov", "Dec" };
22 Regex apache_entry_start("^(?:[^ ]+ )?([^ ]+) +[^ ]+ +([^ ]+) +\\[(.*?)\\] +(.*)$");
23 Regex apache_entry_date("(\\d+)/([A-Za-z]+)/(\\d+):(\\d+):(\\d+):(\\d+) ([+-])(\\d+)");
24 Regex apache_entry_request("\"([^ ]+) +([^ ]+) +([^ ]+)\" +([^ ]+) +([^\\s+]+)(.*)");
25 Regex apache_entry_agent(" +\"([^\"]+)\" +\"([^\"]+)\" +\"([^\"]+)\"");
26 Regex apache_hostname_parts("([^.]+)(?:\\.([^.]+))?(?:\\.([^.]+))?(?:\\.([^.]+))?(?:\\.([^.]+))?(?:\\.([^.]+))?(?:\\.([^.]+))?(?:\\.([^.]+))?$");
27 
ApacheCombinedLog(const std::string & logfile)28 ApacheCombinedLog::ApacheCombinedLog(const std::string& logfile) : RCommitLog(logfile) {
29 }
30 
31 //parse apache access.log entry into components
parseCommit(RCommit & commit)32 bool ApacheCombinedLog::parseCommit(RCommit& commit) {
33 
34     std::string line;
35     std::vector<std::string> matches;
36 
37     if(!logf->getNextLine(line)) return false;
38 
39     apache_entry_start.match(line, &matches);
40 
41     if(matches.size()!=4) {
42         return 0;
43     }
44 
45     //get details
46     commit.username = matches[0];
47 
48     std::string request_str = matches[3];
49     std::string datestr     = matches[2];
50 
51     apache_entry_date.match(datestr, &matches);
52 
53     if(matches.size()!=8) {
54         return 0;
55     }
56 
57     //parse timestamp
58     int day    = atoi(matches[0].c_str());
59     int year   = atoi(matches[2].c_str());
60     int hour   = atoi(matches[3].c_str());
61     int minute = atoi(matches[4].c_str());
62     int second = atoi(matches[5].c_str());
63 
64     int month=0;
65     for(int i=0;i<12;i++) {
66         if(matches[1] == months[i]) {
67             month=i;
68             break;
69         }
70     }
71 
72     struct tm time_str;
73     time_str.tm_year = year - 1900;
74     time_str.tm_mon  = month;
75     time_str.tm_mday = day;
76     time_str.tm_hour = hour;
77     time_str.tm_min = minute;
78     time_str.tm_sec = second;
79     time_str.tm_isdst = -1;
80 
81     commit.timestamp = mktime(&time_str);
82 
83     matches.clear();
84     apache_entry_request.match(request_str, &matches);
85 
86     if(matches.size() < 5) {
87         return false;
88     }
89 
90     std::string rtype = matches[0];
91     std::string file  = matches[1];
92     std::string proto = matches[2];
93 
94     int code      = atoi(matches[3].c_str());
95     int bytes     = atol(matches[4].c_str());
96 
97     //remove args from url
98     size_t argpos = file.rfind("?");
99     if(argpos != std::string::npos) {
100         file = file.substr(0,argpos);
101     }
102 
103     if(file.size()==0) file = "/";
104 
105    //name index pages
106     if(file[file.size()-1] == '/') {
107         file += "index.html";
108     }
109 
110     std::string action = "A";
111     commit.addFile(file, action);
112 
113     std::string refer;
114     std::string agent;
115 
116     if(matches.size() > 5) {
117         std::string agentstr = matches[5];
118         matches.clear();
119         apache_entry_agent.match(agentstr, &matches);
120 
121         if(matches.size()>1) {
122             refer     = matches[0];
123             agent     = matches[1];
124         }
125     }
126 
127     return true;
128 }
129