1<?php if (!defined('PmWiki')) exit();
2/*  Copyright 2006-2021 Patrick R. Michaud (pmichaud@pobox.com)
3    This file is part of PmWiki; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published
5    by the Free Software Foundation; either version 2 of the License, or
6    (at your option) any later version.  See pmwiki.php for full details.
7
8    This script adds blocklisting capabilities to PmWiki, and can
9    be enabled by simply setting the following in local/config.php:
10
11        $EnableBlocklist = 1;
12
13    With $EnableBlocklist set to 1, this module will search through
14    the SiteAdmin.Blocklist page, as well as any other pages given by
15    the $Blocklist pages variable, looking for lines of the
16    form "block:some phrase" or "block:/regex/", with "some phrase"
17    and "/regex/" indicating things to be excluded from any
18    posting to the site.
19
20    In addition, if a page contains IP addresses of the form
21    "a.b.c.d" or "a.b.c.*", then any posts coming from hosts
22    matching the address will be blocked.
23
24    There is also an "unblock:..." form, which removes an entry
25    from the blocklist.  This is useful for removing specific
26    block items in wikifarms and with automatically downloaded
27    blocklists (below).
28
29    The script also has the capability of automatically downloading
30    blocklists from other sources, such as chongqed.org and
31    and the MoinMaster blocklist.  These are configured using
32    the $BlocklistDownload array.  An $EnableBlocklist value
33    of at least 10 configures PmWiki to automatically download
34    these external blocklists and refresh them daily.
35
36    More information about blocklists is available in the
37    PmWiki.Blocklist page.
38
39    Script maintained by Petko YOTOV www.pmwiki.org/petko
40*/
41
42
43##   Some recipes do page updates outside of the built-in posting
44##   cycle, so $EnableBlocklistImmediate is used to determine if
45##   we need to catch these.  Currently this defaults to enabled,
46##   but at some point we may change the default to disabled.
47if (IsEnabled($EnableBlocklistImmediate, 1)) {
48  SDVA($BlocklistActions, array('comment' => 1));
49  $ptext = implode(' ', @$_POST);
50  if ($ptext && @$BlocklistActions[$action]) {
51    Blocklist($pagename, $ptext);
52    if (!$EnablePost) {
53      unset($_POST['post']);
54      unset($_POST['postattr']);
55      unset($_POST['postedit']);
56    }
57  }
58}
59
60
61##   If $EnableBlocklist is set to 10 or higher, then arrange to
62##   periodically download the "moinmaster" blocklists.
63if ($EnableBlocklist >= 10) {
64  SDVA($BlocklistDownload['SiteAdmin.Blocklist-MoinMaster'], array(
65    'url' => 'http://moinmo.in/BadContent?action=raw',
66    'format' => 'regex'));
67}
68
69
70##   CheckBlocklist is inserted into $EditFunctions, to automatically
71##   check for blocks on anything being posted through the normal
72##   "update a page cycle"
73array_unshift($EditFunctions, 'CheckBlocklist');
74function CheckBlocklist($pagename, &$page, &$new) {
75  StopWatch("CheckBlocklist: begin $pagename");
76  $ptext = implode(' ', @$_POST);
77  if (@$ptext) Blocklist($pagename, $ptext);
78  StopWatch("CheckBlocklist: end $pagename");
79}
80
81
82##   Blocklist is the function that does all of the work of
83##   checking for reasons to block a posting.  It reads
84##   the available blocklist pages ($BlocklistPages) and
85##   builds an array of strings and regular expressiongs to
86##   be checked against the page; if any are found, then
87##   posting is blocked (via $EnablePost=0).  The function
88##   also checks the REMOTE_ADDR against any blocked IP addresses.
89function Blocklist($pagename, $text) {
90  global $BlocklistPages, $BlockedMessagesFmt, $BlocklistDownload,
91    $BlocklistDownloadRefresh, $Now, $EnablePost, $WhyBlockedFmt,
92    $MessagesFmt, $BlocklistMessageFmt, $EnableWhyBlocked, $IsBlocked;
93
94  StopWatch("Blocklist: begin $pagename");
95
96  $BlocklistDownload = (array)@$BlocklistDownload;
97  SDV($BlocklistPages,
98    array_merge(array('$SiteAdminGroup.Blocklist',
99                      '$SiteAdminGroup.Blocklist-Farm'),
100                array_keys($BlocklistDownload)));
101  SDV($BlocklistMessageFmt, "<h3 class='wikimessage'>$[This post has been blocked by the administrator]</h3>");
102  SDVA($BlockedMessagesFmt, array(
103    'ip' => '$[Address blocked from posting]: ',
104    'text' => '$[Text blocked from posting]: '));
105  SDV($BlocklistDownloadRefresh, 86400);
106
107  ##  Loop over all blocklist pages
108  foreach((array)$BlocklistPages as $b) {
109
110    ##  load the current blocklist page
111    $pn = FmtPageName($b, $pagename);
112    $page = ReadPage($pn, READPAGE_CURRENT);
113    if (!$page) continue;
114
115    ##  if the page being checked is a blocklist page, stop blocking
116    if ($pagename == $pn) return;
117
118    ##  If the blocklist page is managed by automatic download,
119    ##  schedule any new downloads here
120    if (@$BlocklistDownload[$pn]) {
121      $bd = &$BlocklistDownload[$pn];
122      SDVA($bd, array(
123        'refresh' => $BlocklistDownloadRefresh,
124        'url' => "http://www.pmwiki.org/blocklists/$pn" ));
125      if (!@$page['text'] || $page['time'] < $Now - $bd['refresh'])
126        register_shutdown_function('BlocklistDownload', $pn, getcwd());
127    }
128
129    ##  If the blocklist is simply a list of regexes to be matched, load
130    ##  them into $terms['block'] and continue to the next blocklist page.
131    ##  Some regexes from remote sites aren't well-formed, so we have
132    ##  to escape any slashes that aren't already escaped.
133    if (strpos(@$page['text'], 'blocklist-format: regex') !==false) {
134      if (preg_match_all('/^([^\\s#].+)/m', $page['text'], $match))
135        foreach($match[0] as $m) {
136          $m = preg_replace('#(?<!\\\\)/#', '\\/', trim($m));
137          $terms['block'][] = "/$m/";
138        }
139      continue;
140    }
141
142    ##  Treat the page as a pmwiki-format blocklist page, with
143    ##  IP addresses and "block:"-style declarations.  First, see
144    ##  if we need to block the author based on a.b.c.d or a.b.c.*
145    ##  IP addresses.
146    $ip = preg_quote($_SERVER['REMOTE_ADDR']);
147    $ip = preg_replace('/\\d+$/', '($0\\b|\\*)', $ip);
148    if (preg_match("/\\b$ip/", @$page['text'], $match)) {
149      $EnablePost = 0;
150      $IsBlocked = 1;
151      $WhyBlockedFmt[] = $BlockedMessagesFmt['ip'] . $match[0];
152    }
153
154    ##  Now we'll load any "block:" or "unblock:" specifications
155    ##  from the page text.
156    if (preg_match_all('/(un)?(?:block|regex):(.*)/', @$page['text'],
157                       $match, PREG_SET_ORDER))
158      foreach($match as $m) $terms[$m[1].'block'][] = trim($m[2]);
159  }
160
161  ##  okay, we've loaded all of the terms, now subtract any 'unblock'
162  ##  terms from the block set.
163  StopWatch("Blocklist: diff unblock");
164  $blockterms = array_diff((array)@$terms['block'], (array)@$terms['unblock']);
165
166  ##  go through each of the remaining blockterms and see if it matches the
167  ##  text -- if so, disable posting and add a message to $WhyBlockedFmt.
168  StopWatch('Blocklist: blockterms (count='.count($blockterms).')');
169  $itext = strtolower($text);
170  foreach($blockterms as $b) {
171    if ($b[0] == '/') {
172      if (!preg_match($b, $text)) continue;
173    } else if (strpos($itext, strtolower($b)) === false) continue;
174    $EnablePost = 0;
175    $IsBlocked = 1;
176    $WhyBlockedFmt[] = $BlockedMessagesFmt['text'] . $b;
177  }
178  StopWatch('Blocklist: blockterms done');
179
180  ##  If we came across any reasons to block, let's provide a message
181  ##  to the author that it was blocked.  If $EnableWhyBlocked is set,
182  ##  we'll even tell the author why.  :-)
183  if (@$WhyBlockedFmt) {
184    $MessagesFmt[] = $BlocklistMessageFmt;
185    if (IsEnabled($EnableWhyBlocked, 0))
186      foreach((array)$WhyBlockedFmt as $why)
187        $MessagesFmt[] = "<pre class='blocklistmessage'>$why</pre>\n";
188  }
189  StopWatch("Blocklist: end $pagename");
190}
191
192
193##   BlocklistDownload() handles retrieving blocklists from
194##   external sources into PmWiki pages.  If it's able to
195##   download an updated list, it uses that; otherwise it leaves
196##   any existing list alone.
197function BlocklistDownload($pagename, $dir = '') {
198  global $BlocklistDownloadFmt, $BlocklistDownload, $FmtV;
199
200  if ($dir) { flush(); chdir($dir); }
201  SDV($BlocklistDownloadFmt, "
202  [@
203## blocklist-note:   NOTE: This page is automatically generated by blocklist.php
204## blocklist-note:   NOTE: Any edits to this page may be lost!
205## blocklist-url:    \$BlocklistDownloadUrl
206## blocklist-when:   \$CurrentTimeISO
207#  blocklist-format: \$BlocklistFormat
208\$BlocklistData
209  @]
210");
211
212  ##  get the existing blocklist page
213  $bd = &$BlocklistDownload[$pagename];
214  $page = ReadPage($pagename, READPAGE_CURRENT);
215
216  ##  try to retrieve the remote data
217  $blocklistdata = @file($bd['url']);
218
219  ##  if we didn't get it, and we don't already have text, save a
220  ##  note in the page so we know what happened
221  if (!$blocklistdata && !@$page['text']) {
222    $auf = ini_get('allow_url_fopen');
223    $blocklistdata = "#### Unable to download blocklist (allow_url_fopen=$auf)";
224  }
225
226  ##  if we have some new text to save, let's format it and save it
227  if ($blocklistdata) {
228    $blocklistdata = implode('', (array)$blocklistdata);
229    $blocklistdata = preg_replace('/^##blocklist.*/m', '', $blocklistdata);
230    $FmtV['$BlocklistData'] = $blocklistdata;
231    $FmtV['$BlocklistDownloadUrl'] = $bd['url'];
232    $FmtV['$BlocklistFormat'] = $bd['format'];
233    $page['text'] = FmtPageName($BlocklistDownloadFmt, $pagename);
234    SDV($page['passwdread'], '@lock');
235  }
236
237  ##  save our updated(?) blocklist page
238  WritePage($pagename, $page);
239}
240