1/*
2#   AWFFull - A Webalizer Fork, Full o' features
3#
4#   sample.conf
5#       Sample configuration file
6#
7#   Copyright 1997-2000 by Bradford L. Barrett (brad@mrunix.net)
8#   Copyright (C) 2004- 2008 by Stephen McInerney
9#       (spm@stedee.id.au)
10#
11#   This file is part of AWFFull.
12#
13#   AWFFull is free software: you can redistribute it and/or modify
14#   it under the terms of the GNU General Public License as published by
15#   the Free Software Foundation, either version 3 of the License, or
16#   (at your option) any later version.
17#
18#   AWFFull is distributed in the hope that it will be useful,
19#   but WITHOUT ANY WARRANTY; without even the implied warranty of
20#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21#   GNU General Public License for more details.
22#
23#   You should have received a copy of the GNU General Public License
24#   along with AWFFull.  If not, see <http://www.gnu.org/licenses/>.
25#
26*/
27
28# Sample AWFFull configuration file
29#
30# This is a sample configuration file for AWFFull (v3.8.1)
31# Lines starting with pound signs '#' are comment lines and are
32# ignored.  Blank lines are skipped as well.  Other lines are considered
33# as configuration lines, and have the form "ConfigOption  Value" where
34# ConfigOption is a valid configuration keyword, and Value is the value
35# to assign that configuration option.  Invalid keyword/values are
36# ignored, with appropriate warnings being displayed.  There must be
37# at least one space or tab between the keyword and its value.
38#
39# AWFFull will look for a 'default' configuration file
40# "/usr/local/etc/awffull.conf", and if found, use that.
41# the '-c config.file' option can also be used to specify an alternate
42# configuration file. Or multiple configuration files, with multiple -c's.
43
44
45# LogFile defines the web server log file to use.  If not specified
46# here or on on the command line, input will default to STDIN.  If
47# the log filename ends in '.gz' (ie: a gzip compressed file), it will
48# be decompressed on the fly as it is being read.
49
50#LogFile        /var/lib/httpd/logs/access_log
51
52# LogType defines the log type being processed.  Normally, AWFFull
53# expects a CLF or Combined web server log as input.  Using this option,
54# you can process ftp logs as well (xferlog as produced by wu-ftp and
55# others), or Squid native logs.
56# Values can be 'auto' 'clf', 'combined', 'ftp', 'domino' or 'squid', with
57# 'auto' the default.
58# The 'auto' value means that AWFFull will try and work out what log format
59# you are sending to it. If no joy, AWFFull will immediately exit.
60
61#LogType        auto
62
63# OutputDir is where you want to put the output files.  This should
64# should be a full path name, however relative ones might work as well.
65# If no output directory is specified, the current directory will be used.
66
67#OutputDir      .
68
69# HistoryName allows you to specify the name of the history file produced
70# by AWFFull.  The history file keeps the data for up to 12 months
71# worth of logs, used for generating the main HTML page (index.html).
72# The default is a file named "awffull.hist", stored in the specified
73# output directory.  If you specify just the filename (without a path),
74# it will be kept in the specified output directory.  Otherwise, the path
75# is relative to the output directory, unless absolute (leading /).
76
77#HistoryName    awffull.hist
78
79# Incremental processing allows multiple partial log files to be used
80# instead of one huge one.  Useful for large sites that have to rotate
81# their log files more than once a month.  AWFFull will save its
82# internal state before exiting, and restore it the next time run, in
83# order to continue processing where it left off.  This mode also causes
84# AWFFull to scan for and ignore duplicate records (records already
85# processed by a previous run).  See the README file for additional
86# information.  The value may be 'yes' or 'no', with a default of 'no'.
87# The file 'awffull.current' is used to store the current state data,
88# and is located in the output directory of the program (unless changed
89# with the IncrementalName option below).  Please read at least the section
90# on Incremental processing in the README file before you enable this option.
91
92#Incremental    no
93
94# IncrementalName allows you to specify the filename for saving the
95# incremental data in.  It is similar to the HistoryName option where the
96# name is relative to the specified output directory, unless an absolute
97# filename is specified.  The default is a file named "awffull.current"
98# kept in the normal output directory.  If you don't specify "Incremental"
99# as 'yes' then this option has no meaning.
100
101#IncrementalName        awffull.current
102
103# ReportTitle is the text to display as the title.  The hostname
104# (unless blank) is appended to the end of this string (separated with
105# a space) to generate the final full title string.
106# Default is (for English) "Usage Statistics for".
107
108#ReportTitle    Usage Statistics for
109
110# HostName defines the hostname for the report.  This is used in
111# the title, and is prepended to the URL table items.  This allows
112# clicking on URL's in the report to go to the proper location in
113# the event you are running the report on a 'virtual' web server,
114# or for a server different than the one the report resides on.
115# If not specified here, or on the command line, awffull will
116# try to get the hostname via a uname system call.  If that fails,
117# it will default to "localhost".
118
119#HostName       localhost
120
121# HTMLExtension allows you to specify the filename extension to use
122# for generated HTML pages.  Normally, this defaults to "html", but
123# can be changed for sites who need it (like for PHP embedded pages).
124
125#HTMLExtension  html
126
127# PageType lets you tell AWFFull what types of URL's you
128# consider a 'page'.  Most people consider html and cgi documents
129# as pages, while not images and audio files.  If no types are
130# specified, defaults will be used ('htm', 'html', 'cgi' and HTMLExtension
131# if different for web logs, 'txt' for ftp logs).
132# Putting the more likely page types first in the list should increase the
133# speed of a run.
134# Do Not Use Wildcards Here. It will not work.
135
136PageType        htm
137PageType        html
138PageType        php
139#PageType       pl
140#PageType       cfm
141#PageType       pdf
142#PageType       txt
143#PageType       cgi
144
145# NotPageType is the direct and incompatible opposite of PageType.
146# You can use one set or the other, but not both.
147# PageType specifies what *is* a Page, NotPageType specifies what
148# *isn't*, and hence by implication, everything else is a page.
149# Neither method is more or lessor correct than the other. It's more
150# what is more accurate for *your* site.
151# Do not add the "." or use any wildcards. As a general rule.
152# There are some assumed internal optimisations that may otherwise
153# break.
154# Those who understand pcre's would do well to examine the source
155# of parser.c if they wish to extract greater flexibility from the
156# below.
157
158#NotPageType     gif
159#NotPageType     css
160#NotPageType     js
161#NotPageType     jpg
162#NotPageType     ico
163#NotPageType     png
164
165# CSSFilename is used to set the name of the CSS file to use in conjunction
166# with the generated html. An existing file is *not* overwritten, so feel free
167# to make you own changes to the default file.
168
169#CSSFilename	awffull.css
170
171# UseHTTPS should be used if the analysis is being run on a
172# secure server, and links to urls should use 'https://' instead
173# of the default 'http://'.  If you need this, set it to 'yes'.
174# Default is 'no'.  This only changes the behaviour of the 'Top
175# URL's' table.
176
177#UseHTTPS       no
178
179# HTMLPre defines HTML code to insert at the very beginning of the
180# file.  Default is the DOCTYPE line shown below.  Max line length
181# is 80 characters, so use multiple HTMLPre lines if you need more.
182
183#HTMLPre <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
184
185# HTMLHead defines HTML code to insert within the <HEAD></HEAD>
186# block, immediately after the <TITLE> line.  Maximum line length
187# is 80 characters, so use multiple lines if needed.
188
189#HTMLHead <META NAME="author" CONTENT="AWFFull">
190
191# HTMLBody defined the HTML code to be inserted, starting with the
192# <BODY> tag.  If not specified, the default is shown below.  If
193# used, you MUST include your own <BODY> tag as the first line.
194# Maximum line length is 80 char, use multiple lines if needed.
195
196#HTMLBody <BODY BGCOLOR="#E8E8E8" TEXT="#000000" LINK="#0000FF" VLINK="#FF0000">
197
198# HTMLPost defines the HTML code to insert immediately before the
199# first <HR> on the document, which is just after the title and
200# "summary period"-"Generated on:" lines.  If anything, this should
201# be used to clean up in case an image was inserted with HTMLBody.
202# As with HTMLHead, you can define as many of these as you want and
203# they will be inserted in the output stream in order of appearance.
204# Max string size is 80 characters.  Use multiple lines if you need to.
205
206#HTMLPost       <BR CLEAR="all">
207
208# HTMLTail defines the HTML code to insert at the bottom of each
209# HTML document, usually to include a link back to your home
210# page or insert a small graphic.  It is inserted as a table
211# data element (ie: <TD> your code here </TD>) and is right
212# aligned with the page.  Max string size is 80 characters.
213
214#HTMLTail <IMG SRC="yourlogo.png" ALT="Company XYZ!">
215
216# HTMLEnd defines the HTML code to add at the very end of the
217# generated files.  It defaults to what is shown below.  If
218# used, you MUST specify the </BODY> and </HTML> closing tags
219# as the last lines.  Max string length is 80 characters.
220
221#HTMLEnd </BODY></HTML>
222
223# TimeMe allows you to force the display of timing information
224# at the end of processing.  A value of 'yes' will force the
225# timing information to be displayed.  A value of 'no' has no
226# effect.
227
228#TimeMe         no
229
230# GMTTime allows reports to show GMT (UTC) time instead of local
231# time.  Default is to display the time the report was generated
232# in the timezone of the local machine, such as EDT or PST.  This
233# keyword allows you to have times displayed in UTC instead.  Use
234# only if you really have a good reason, since it will probably
235# screw up the reporting periods by however many hours your local
236# time zone is off of GMT.
237
238#GMTTime        no
239
240# FoldSeqErr forces AWFFull to ignore sequence errors.
241# This is useful for Netscape and other web servers that cache
242# the writing of log records and do not guarantee that they
243# will be in chronological order.  The use of the FoldSeqErr
244# option will cause out of sequence log records to be treated
245# as if they had the same time stamp as the last valid record.
246# Default is to ignore out of sequence log records.
247
248#FoldSeqErr     no
249
250# VisitTimeout allows you to set the default timeout for a visit
251# (sometimes called a 'session').  The default is 30 minutes,
252# which should be fine for most sites.
253# Visits are determined by looking at the time of the current
254# request, and the time of the last request from the site.  If
255# the time difference is greater than the VisitTimeout value, it
256# is considered a new visit, and visit totals are incremented.
257# Value is the number of seconds to timeout (default=1800=30min)
258
259#VisitTimeout   1800
260
261# IgnoreHist shouldn't be used in a config file, but it is here
262# just because it might be useful in certain situations.  If the
263# history file is ignored, the main "index.html" file will only
264# report on the current log files contents.  Useful only when you
265# want to reproduce the reports from scratch.  USE WITH CAUTION!
266# Valid values are "yes" or "no".  Default is "no".
267
268#IgnoreHist     no
269
270# TrackPartialRequests is used to track 206 codes. This gives two
271# additional columns in the Top URLs tables.
272# The first to "Hits" counts the number of partial requests
273# The second to "Volume" counts the volume in partial requests
274# This option is more of use to those with lots of PDF's.
275
276#TrackPartialRequests    no
277
278# CountryGraph allows the usage by country graph to be disabled.
279# Values can be 'yes' or 'no', default is 'yes'.
280
281#CountryGraph   yes
282
283# GeoIP enables or disables the use of the GeoIP capability for more
284# accurate detection of countries. Default is 'no'.
285# NOTE! Do not enable GeoIP if you analyse files that have had the IP Address
286# translated to a Fully Qualified Host Name.
287# Use either raw IP Addresses and GeoIP, or Names and disable GeoIP.
288# ie. Don't use GeoIP AND DNSHistory.
289
290#GeoIP          no
291
292# GeoIPDatabase is the location of the GeoIP database file. Default is
293# '/usr/local/share/GeoIP/GeoIP.dat', which is where a default GeoIP
294# install will put it. Note that the database is updated monthly.
295# For the details see: http://www.maxmind.com/app/geoip_country
296
297#GeoIPDatabase   /usr/local/share/GeoIP/GeoIP.dat
298
299# FlagsLocation will enable the display of country flags in the country
300# table. The path is that for a webserver, not file system. Can be
301# relative or complete. The trailing slash is not necessary.
302
303#FlagsLocation		flags
304
305# DailyGraph and DailyStats allows the daily statistics graph
306# and statistics table to be disabled (not displayed).  Values
307# may be "yes" or "no". Default is "yes".
308
309#DailyGraph     yes
310#DailyStats     yes
311
312# HourlyGraph and HourlyStats allows the hourly statistics graph
313# and statistics table to be disabled (not displayed).  Values
314# may be "yes" or "no". Default is "yes".
315
316#HourlyGraph    yes
317#HourlyStats    yes
318
319# TopURLsbyHITsGraph - Display a pie chart of the top URLs by HITS
320#TopURLsbyHitsGraph      yes
321#TopURLsbyVolGraph       yes
322
323# TopExitPagesGraph - Display Top Exit Pages Pie Chart
324#   no     for don't display
325#   hits   for by hits
326#   visits for by visits
327#TopExitPagesGraph    visits
328
329# TopEntryPagesGraph - Display Top Entry Pages Pie Chart
330#   no     for don't display
331#   hits   for by hits
332#   visits for by visits
333#TopEntryPagesGraph    visits
334
335# TopSitesbyPagesGraph - Display a pie chart of the Top Sites by Page Impressions
336#TopSitesbyPagesGraph    yes
337
338# TopSitesbyVolGraph - Display a pie chart of the Top Sites by Page Impressions
339#TopSitesbyVolGraph    yes
340
341# TopAgentsGraph - Display a pie chart of the Top User Agents (by pages)
342#TopAgentsGraph         yes
343
344# GraphLegend allows the color coded legends to be turned on or off
345# in the graphs.  The default is for them to be displayed.  This only
346# toggles the color coded legends, the other legends are not changed.
347# If you think they are hideous and ugly, say 'no' here :)
348
349#GraphLegend    yes
350
351# GraphLines allows you to have index lines drawn behind the graphs.
352# Anything other than "no" will enable the lines.
353
354#GraphLines     yes
355
356# YearlySubtotals will display the subtotal for a given year in the main
357# page. This is in addition to the Grand Total of all years.
358
359#YearlySubtotals         no
360
361# The "Top" options below define the number of entries for each table.
362# Defaults are Sites=30, URL's=30, Referrers=30 and Agents=15, and
363# Countries=30. TopKSites and TopKURLs (by KByte tables) both default
364# to 10, as do the top entry/exit tables (TopEntry/TopExit).  The top
365# search strings and user names default to 20.  Tables may be disabled
366# by using zero (0) for the value.
367# Top404Errors, displays a table of error requests, and the corresponding
368# referring URL.
369
370#TopSites        30
371#TopKSites       10
372#TopURLs         30
373#TopKURLs        10
374#TopReferrers    30
375#TopAgents       15
376#TopCountries    30
377#TopEntry        10
378#TopExit         10
379#TopSearch       20
380#TopUsers        20
381#Top404Errors    0
382
383# The All* keywords allow the display of all URL's, Sites, Referrers
384# User Agents, Search Strings and User names.  If enabled, a separate
385# HTML page will be created, and a link will be added to the bottom
386# of the appropriate "Top" table.  There are a couple of conditions
387# for this to occur..  First, there must be more items than will fit
388# in the "Top" table (otherwise it would just be duplicating what is
389# already displayed).  Second, the listing will only show those items
390# that are normally visible, which means it will not show any hidden
391# items.  Grouped entries will be listed first, followed by individual
392# items.  The value for these keywords can be either 'yes' or 'no',
393# with the default being 'no'.  Please be aware that these pages can
394# be quite large in size, particularly the sites page,  and separate
395# pages are generated for each month, which can consume quite a lot
396# of disk space depending on the traffic to your site.
397# All404Errors displays a table of error requests, and the corresponding
398# referring URL.
399
400#AllSites       no
401#AllURLs        no
402#AllReferrers   no
403#AllAgents      no
404#AllSearchStr   no
405#AllUsers       no
406#All404Errors   no
407
408# AWFFull normally strips the string 'index.' off the end of
409# URL's in order to consolidate URL totals.  For example, the URL
410# /somedir/index.html is turned into /somedir/ which is really the
411# same URL.  This option allows you to specify additional strings
412# to treat in the same way.  You don't need to specify 'index.' as
413# it is always scanned for by AWFFull, this option is just to
414# specify _additional_ strings if needed.  If you don't need any,
415# don't specify any as each string will be scanned for in EVERY
416# log record... A bunch of them will degrade performance.  Also,
417# the string is scanned for anywhere in the URL, so a string of
418# 'home' would turn the URL /somedir/homepages/brad/home.html into
419# just /somedir/ which is probably not what was intended.
420
421#IndexAlias     home.htm
422#IndexAlias     homepage.htm
423
424# The opposite (in a way) of IndexAlias is IgnoreIndexAlias.
425# This will STOP any URL variable stripping, as well as ignoring the
426# default "index." setting, or any that you set above.
427
428#IgnoreIndexAlias no
429
430
431# The Hide*, Group* and Ignore* and Include* keywords allow you to
432# change the way Sites, URL's, Referrers, User Agents and User names
433# are manipulated.  The Ignore* keywords will cause AWFFull to
434# completely ignore records as if they didn't exist (and thus not
435# counted in the main site totals).  The Hide* keywords will prevent
436# things from being displayed in the 'Top' tables, but will still be
437# counted in the main totals.  The Group* keywords allow grouping
438# similar objects as if they were one.  Grouped records are displayed
439# in the 'Top' tables and can optionally be displayed in BOLD and/or
440# shaded. Groups cannot be hidden, and are not counted in the main
441# totals. The Group* options do not, by default, hide all the items
442# that it matches.  If you want to hide the records that match (so just
443# the grouping record is displayed), follow with an identical Hide*
444# keyword with the same value.  (see example below)  In addition,
445# Group* keywords may have an optional label which will be displayed
446# instead of the keywords value.  The label should be separated from
447# the value by at least one 'white-space' character, such as a space
448# or tab.
449#
450# The value can have either a leading or trailing '*' wildcard
451# character.  If no wildcard is found, a match can occur anywhere
452# in the string. Given a string "www.yourmama.com", the values "your",
453# "*mama.com" and "www.your*" will all match.
454
455# Your own site should be hidden
456#HideSite       *mrunix.net
457#HideSite       localhost
458
459# Your own site gives most referrals
460#HideReferrer   mrunix.net/
461
462# This one hides non-referrers ("-" Direct requests)
463#HideReferrer   Direct Request
464
465# Usually you want to hide these
466HideURL         *.gif
467HideURL         *.GIF
468HideURL         *.jpg
469HideURL         *.JPG
470HideURL         *.png
471HideURL         *.PNG
472HideURL         *.ra
473
474# Hiding agents is kind of futile
475#HideAgent      RealPlayer
476
477# You can also hide based on authenticated user name
478#HideUser       root
479#HideUser       admin
480
481# Grouping options
482#GroupURL       /cgi-bin/*      CGI Scripts
483#GroupURL       /images/*       Images
484
485#GroupSite      *.aol.com
486#GroupSite      *.compuserve.com
487
488#GroupReferrer  yahoo.com/      Yahoo!
489#GroupReferrer  excite.com/     Excite
490#GroupReferrer  infoseek.com/   InfoSeek
491#GroupReferrer  webcrawler.com/ WebCrawler
492
493#GroupUser      root            Admin users
494#GroupUser      admin           Admin users
495#GroupUser      wheel           Admin users
496
497# The following is a great way to get an overall total
498# for browsers, and not display all the detail records.
499# (You should use MangleAgent to refine further...)
500#
501# Simplified browser list for Webalizer. Copy & paste in awffull.conf,
502#    replacing the original list.
503#
504# Longer version in http://griho.udl.es/webalizer/groupagent.txt
505# Full version in http://griho.udl.es/webalizer/webalizer.conf.txt
506#
507# Version: 1.1 14/May/2005
508#
509
510# GroupAndHideAgent is equivalent to the two lines of a GroupAgent, then a HideAgent
511GroupAndHideAgent       Googlebot                       Spider: Googlebot
512GroupAndHideAgent       msnbot*                         Spider: MSNBot
513
514GroupAndHideAgent       AppleWebKit/            Browser: Safari (OSX)
515GroupAndHideAgent       Camino                  Browser: Camino (OSX)
516GroupAndHideAgent       Epiphany                Browser: Epiphany (Gentoo)
517GroupAndHideAgent       Firebird/               Browser: Firebird
518GroupAndHideAgent       Firefox/                Browser: Firefox
519GroupAndHideAgent       Galeon/                 Browser: Galeon
520GroupAndHideAgent       Konqueror/              Browser: Konqueror
521GroupAndHideAgent       Netscape6/              Browser: Netscape 6
522GroupAndHideAgent       Netscape/7              Browser: Netscape 7
523GroupAndHideAgent       Netscape/8              Browser: Netscape 8
524GroupAndHideAgent       rv:1.                   Browser: Mozilla 1.x
525GroupAndHideAgent       Opera                   Browser: Opera
526GroupAndHideAgent       Mozilla/1               Browser: Netscape v1.xx
527GroupAndHideAgent       Mozilla/2               Browser: Netscape v2.xx
528GroupAndHideAgent       Mozilla/3.04Gold        Browser: Netscape 3.04 Gold
529GroupAndHideAgent       Mozilla/3               Browser: Netscape v3.xx
530GroupAndHideAgent       Mozilla/4.03            Browser: Netscape 4.03
531GroupAndHideAgent       Mozilla/4.04            Browser: Netscape 4.04
532GroupAndHideAgent       Mozilla/4.05            Browser: Netscape 4.05
533GroupAndHideAgent       Mozilla/4.06            Browser: Netscape 4.06
534GroupAndHideAgent       Mozilla/4.08            Browser: Netscape 4.08
535GroupAndHideAgent       Mozilla/4.5             Browser: Netscape 4.5
536GroupAndHideAgent       Mozilla/4.61            Browser: Netscape 4.6 (Mac/WinNT)
537GroupAndHideAgent       Mozilla/4.6             Browser: Netscape 4.6 (Win95/Win98)
538GroupAndHideAgent       Mozilla/4.72            Browser: Netscape 4.72
539GroupAndHideAgent       Mozilla/4.73            Browser: Netscape 4.73
540GroupAndHideAgent       Mozilla/4.75            Browser: Netscape 4.75
541GroupAndHideAgent       Mozilla/4.76            Browser: Netscape 4.76
542GroupAndHideAgent       Mozilla/4.77            Browser: Netscape 4.77
543GroupAndHideAgent       Mozilla/4.78            Browser: Netscape 4.78
544GroupAndHideAgent       Mozilla/4.79            Browser: Netscape 4.79
545GroupAndHideAgent       Mozilla/4.7             Browser: Netscape 4.7
546GroupAndHideAgent       Mozilla/4.8             Browser: Netscape 4.8
547GroupAndHideAgent       Mozilla/5.0             Browser: Netscape 4.8
548GroupAndHideAgent       "compatible; MSIE 6.0"  Browser: Internet Explorer 6.0 (Win)
549GroupAndHideAgent       "compatible; MSIE 7.01" Spambot: Pretends to be MSIE 7.01
550GroupAndHideAgent       "compatible; MSIE 7.0"  Browser: Internet Explorer 7.0 (Win)
551GroupAndHideAgent       "compatible; MSIE 5.5"  Browser: Internet Explorer 5.5 (Win)
552GroupAndHideAgent       "compatible; MSIE 5.01" Browser: Internet Explorer 5.01
553# this 4.0 entry is matching Mozilla/4.0 which applies for every MSIE in the net, leave it commented
554##GroupAgent    4.0                     Browser: Internet Explorer 4.0
555##HideAgent     4.0
556GroupAndHideAgent       4.5                     Browser: Internet Explorer 4.5
557GroupAndHideAgent       5.0                     Browser: Internet Explorer 5.0
558GroupAndHideAgent       5.12                    Browser: Internet Explorer 5.12 (Mac)
559GroupAndHideAgent       5.13                    Browser: Internet Explorer 5.13 (Mac)
560GroupAndHideAgent       5.14                    Browser: Internet Explorer 5.14 (Mac)
561GroupAndHideAgent       5.15                    Browser: Internet Explorer 5.15 (Mac)
562GroupAndHideAgent       5.16                    Browser: Internet Explorer 5.16 (Mac)
563GroupAndHideAgent       5.17                    Browser: Internet Explorer 5.17 (Mac)
564GroupAndHideAgent       5.21                    Browser: Internet Explorer 5.21 (Mac)
565GroupAndHideAgent       5.22                    Browser: Internet Explorer 5.22 (Mac)
566GroupAndHideAgent       5.23                    Browser: Internet Explorer 5.23 (Mac)
567GroupAndHideAgent       "compatible; MSIE 5.0"  Browser: Internet Explorer 5.0
568GroupAndHideAgent       "compatible; MSIE 4.5"  Browser: Internet Explorer 4.5
569GroupAndHideAgent       3.0                     Browser: Internet Explorer 3.0  (win95)
570GroupAndHideAgent       3.0B                    Browser: Internet Explorer 3.0B (win95)
571GroupAndHideAgent       3.01                    Browser: Internet Explorer 3.01 (win95)
572GroupAndHideAgent       4.01                    Browser: Internet Explorer 4.01
573
574# we comment MSIE because many agents use it in their name to disguise as Internet Explorer
575#####GroupAgent MSIE            Browser: Internet Explorer (unknown version)
576#####HideAgent  MSIE
577
578
579# HideAllSites allows forcing individual sites to be hidden in the
580# report.  This is particularly useful when used in conjunction
581# with the "GroupDomain" feature, but could be useful in other
582# situations as well, such as when you only want to display grouped
583# sites (with the GroupSite keywords...).  The value for this
584# keyword can be either 'yes' or 'no', with 'no' the default,
585# allowing individual sites to be displayed.
586
587#HideAllSites   no
588
589
590# The GroupDomains keyword allows you to group individual host names
591# into their respective domains.  The value specifies the level of
592# grouping to perform, and can be thought of as 'the number of dots'
593# that will be displayed.  For example, if a visiting host is named
594# cust1.tnt.mia.uu.net, a domain grouping of 1 will result in just
595# "uu.net" being displayed, while a 2 will result in "mia.uu.net".
596# The default value of zero disable this feature.  Domains will only
597# be grouped if they do not match any existing "GroupSite" records,
598# which allows overriding this feature with your own if desired.
599
600#GroupDomains   0
601
602# The GroupShading allows grouped rows to be shaded in the report.
603# Useful if you have lots of groups and individual records that
604# intermingle in the report, and you want to differentiate the group
605# records a little more.  Value can be 'yes' or 'no', with 'yes'
606# being the default.
607
608#GroupShading   yes
609
610# GroupHighlight allows the group record to be displayed in BOLD.
611# Can be either 'yes' or 'no' with the default 'yes'.
612
613#GroupHighlight yes
614
615
616# Segmenting - segXXX
617# Segmenting is a bit like the Ignore* and Include* keywords. Where it
618# differs is in "remembering". Such that, as a "session" moves away from
619# the original condition, that session is still tracked.
620# So if you segment on a referral from Google, only sessions that were
621# referred to the site from Google will be tracked. Even as they access
622# other pages within the site.
623# eg. Google -> Site Page 1 -> Site Page 2 -> Site Page 3
624# Whereas Ignore/Include would only filter the first interaction.
625# eg. Google -> Site Page 1
626#
627# By "session" it is meant that the time limitation of a session (typically
628# 30 minutes timeout) will impact. So in the above example from Google, if
629# the last step (from Page 2 to Page 3) occurred 31+ minutes after the Page 1
630# to Page 2 transition, then this final step would NOT be included. The trail
631# would be:
632#   Google -> Site Page 1 -> Site Page 2
633#
634# Please do be aware that currently AWFFull uses IP Addresses to determine
635# the continuation of a given session. This will be most flawed if you have
636# a user population that sits behind corporate firewalls, or ISP Proxies.
637# To mention two major problem areas.
638#
639# Why do Segmenting?
640# http://judah.webanalyticsdemystified.com/2007/11/a-few-tips-on-web-analytics-segmentation.html
641#  "Segment analysis will tell you different things about your audience than
642#    you will realize from studying overall population metrics."
643#  "The goal of segmentation is to maximize future value of that segment by
644#    optimizing your marketing mix."
645# With apologies to Judah for mixing his phrase order around. :-)
646
647# Segment by Country
648# Only track sessions that come from the following countries.
649# This will be determined by:
650# 1. Use of AssignToCountry overrides
651# 2. GeoIP lookups if so configured and enabled
652# 3. Hostname TLD. eg .au
653# The third option is generally going to be the worst for accuracy.
654# We have plenty of Australian IP addresses that are .com or .net etc.
655# It is strongly advised to enable GeoIP if you wish to use this option.
656#SegCountry AU
657#SegCountry US
658#SegCountry BR
659
660# Segment by Referer
661# Only track sessions that originated from the following referrers.
662# NOTE!!!! SegReferer only works against the HOST name. Not the full URL.
663#SegReferer *google.com.au
664#SegReferer *yahoo.com.au
665#SegReferer ninemsn
666
667
668# The Ignore* keywords allow you to completely ignore log records based
669# on hostname, URL, user agent, referrer or user name.  I hesitated in
670# adding these, since the Webalizer was designed to generate _accurate_
671# statistics about a web servers performance.  By choosing to ignore
672# records, the accuracy of reports become skewed, negating why I wrote
673# this program in the first place.  However, due to popular demand, here
674# they are.  Use the same as the Hide* keywords, where the value can have
675# a leading or trailing wildcard '*'.  Use at your own risk ;)
676
677#IgnoreSite     bad.site.net
678#IgnoreURL      /test*
679#IgnoreReferrer file:/*
680#IgnoreAgent    RealPlayer
681#IgnoreUser     root
682
683
684# The Include* keywords allow you to force the inclusion of log records
685# based on hostname, URL, user agent, referrer or user name.  They take
686# precedence over the Ignore* keywords.  Note: Using Ignore/Include
687# combinations to selectively process parts of a web site is _extremely
688# inefficient_!!! Avoid doing so if possible (ie: grep the records to a
689# separate file if you really want that kind of report).
690
691# Example: Only show stats on Joe User's pages...
692#IgnoreURL      *
693#IncludeURL     ~joeuser*
694
695# Or based on an authenticated user name
696#IgnoreUser     *
697#IncludeUser    someuser
698
699
700# The MangleAgents allows you to specify how much, if any, AWFFull
701# should mangle user agent names.  This allows several levels of detail
702# to be produced when reporting user agent statistics.  There are six
703# levels that can be specified, which define different levels of detail
704# suppression.  Level 5 shows only the browser name (MSIE or Mozilla)
705# and the major version number.  Level 4 adds the minor version number
706# (single decimal place).  Level 3 displays the minor version to two
707# decimal places.  Level 2 will add any sub-level designation (such
708# as Mozilla/3.01Gold or MSIE 3.0b).  Level 1 will attempt to also add
709# the system type if it is specified.  The default Level 0 displays the
710# full user agent field without modification and produces the greatest
711# amount of detail.  User agent names that can't be mangled will be
712# left unmodified.
713
714#MangleAgents    0
715
716# The SearchEngine keywords allow specification of search engines and
717# their query strings on the URL.  These are used to locate and report
718# what search strings are used to find your site.  The first word is
719# a substring to match in the referrer field that identifies the search
720# engine, and the second is the URL variable used by that search engine
721# to define it's search terms.
722
723SearchEngine    google.         q=
724SearchEngine    yahoo.          p=
725SearchEngine    msn.            q=
726SearchEngine    search.aol      query=
727SearchEngine    altavista.      q=
728SearchEngine    lycos.          query=
729SearchEngine    hotbot.         query=
730SearchEngine    alltheweb.      query=
731SearchEngine    infoseek.       qt=
732SearchEngine    webcrawler      searchText=
733SearchEngine    excite          search=
734SearchEngine    netscape.       query=
735SearchEngine    ask.com         q=
736SearchEngine    webwombat.      ix=
737SearchEngine    earthlink.      q=
738SearchEngine    search.comcast. q=
739SearchEngine    search.mywebsearch.     searchfor=
740SearchEngine    reference.com   q=
741SearchEngine    mamma.com       query=
742# Last attempt catch all
743SearchEngine    search.         q=
744
745
746# AssignToCountry allows a form of override to force given domains
747# to a specified country. Use the standard 2 letter country codes.
748# Can also use org, com, net and so on, if more appropriate.
749# With judicious use of the AllSites, GroupSite and 'whois', this
750# can fairly easily cover all your majority users with not too much
751# effort.
752
753#AssignToCountry  *.bigpond.com           au
754#AssignToCountry  *.internode.on.net      au
755#AssignToCountry  203.36.*                au
756
757#AssignToCountry  *.ntli.net              uk
758#AssignToCountry  *.btcentralplus.com     uk
759
760
761# The Dump* keywords allow the dumping of Sites, URL's, Referrers
762# User Agents, User names and Search strings to separate tab delimited
763# text files, suitable for import into most database or spreadsheet
764# programs.
765# DumpPath specifies the path to dump the files.  If not specified,
766# it will default to the current output directory.  Do not use a
767# trailing slash ('/').
768
769#DumpPath       /var/lib/httpd/logs
770
771# The DumpHeader keyword specifies if a header record should be
772# written to the file.  A header record is the first record of the
773# file, and contains the labels for each field written.  Normally,
774# files that are intended to be imported into a database system
775# will not need a header record, while spreadsheets usually do.
776# Value can be either 'yes' or 'no', with 'no' being the default.
777
778#DumpHeader     no
779
780# DumpExtension allow you to specify the dump filename extension
781# to use.  The default is "tab", but some programs are picky about
782# the filenames they use, so you may change it here (for example,
783# some people may prefer to use "csv").
784
785#DumpExtension  tab
786
787# These control the dumping of each individual table.  The value
788# can be either 'yes' or 'no'.. the default is 'no'.
789
790#DumpSites      no
791#DumpURLs       no
792#DumpReferrers  no
793#DumpAgents     no
794#DumpUsers      no
795#DumpSearchStr  no
796#DumpEntryPages no
797#DumpExitPages  no
798#DumpCountries  no
799
800
801# This option controls how many years worth of data to display on the
802# front summary page. In months.
803#   eg: Display the last 5 years: 5 x 12 = 60
804# IndexMonths     60
805
806
807# The following Graph????X or Y options are used to modify the sizes of the
808# created charts.
809# The default settings are shown. The defaults are also the minimum settings.
810
811# The main chart on the front page. Summary of all Months.
812#GraphIndexX    512
813#GraphIndexY    256
814
815# The Day by Day Summary graph at the start of each Months Summary.
816#GraphDailyX  512
817#GraphDailyY  400
818
819# The Hourly Average graph within each Months Summary.
820#GraphHourlyX    512
821#GraphHourlyY    256
822
823# All pie charts are the same size.
824#GraphPieX      512
825#GraphPieY      300
826
827
828# The custom  bar graph  and pie Colors are defined here.
829# Declare them in the standard hexadecimal way (as HTML, but without the '#')
830# If none are given, you will get the standard Webalizer colors.
831
832#ColorHit       00805c
833#ColorFile      0000ff
834#ColorSite      ff8000
835#ColorKbyte     ff0000
836#ColorPage      00c0ff
837#ColorVisit     ffff00
838
839#PieColor1      800080
840#PieColor2      80ffc0
841#PieColor3      ff00ff
842#PieColor4      ffc480
843
844# End of configuration file...  Have a nice day!
845