1<?php
2/***************************************************************************
3*                              functions_search.php
4*                              -------------------
5*     begin                : Wed Sep 05 2001
6*     copyright            : (C) 2002 The phpBB Group
7*     email                : support@phpbb.com
8*
9*     $Id: functions_search.php 5204 2005-09-14 18:14:30Z acydburn $
10*
11****************************************************************************/
12
13/***************************************************************************
14 *
15 *   This program is free software; you can redistribute it and/or modify
16 *   it under the terms of the GNU General Public License as published by
17 *   the Free Software Foundation; either version 2 of the License, or
18 *   (at your option) any later version.
19 *
20 ***************************************************************************/
21
22function clean_words($mode, &$entry, &$stopword_list, &$synonym_list)
23{
24	static $drop_char_match =   array('^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '-', '~', '+', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!');
25	static $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '',  '',   ' ', ' ', ' ', ' ', '',  ' ', ' ', '',  ' ',  ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' , ' ', ' ', ' ', ' ',  ' ', ' ');
26
27	$entry = ' ' . strip_tags(strtolower($entry)) . ' ';
28
29	if ( $mode == 'post' )
30	{
31		// Replace line endings by a space
32		$entry = preg_replace('/[\n\r]/is', ' ', $entry);
33		// HTML entities like &nbsp;
34		$entry = preg_replace('/\b&[a-z]+;\b/', ' ', $entry);
35		// Remove URL's
36		$entry = preg_replace('/\b[a-z0-9]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/]+)?/', ' ', $entry);
37		// Quickly remove BBcode.
38		$entry = preg_replace('/\[img:[a-z0-9]{10,}\].*?\[\/img:[a-z0-9]{10,}\]/', ' ', $entry);
39		$entry = preg_replace('/\[\/?url(=.*?)?\]/', ' ', $entry);
40		$entry = preg_replace('/\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{10,}(\:[a-z0-9]+)?=?.*?\]/', ' ', $entry);
41	}
42	else if ( $mode == 'search' )
43	{
44		$entry = str_replace(' +', ' and ', $entry);
45		$entry = str_replace(' -', ' not ', $entry);
46	}
47
48	//
49	// Filter out strange characters like ^, $, &, change "it's" to "its"
50	//
51	for($i = 0; $i < count($drop_char_match); $i++)
52	{
53		$entry =  str_replace($drop_char_match[$i], $drop_char_replace[$i], $entry);
54	}
55
56	if ( $mode == 'post' )
57	{
58		$entry = str_replace('*', ' ', $entry);
59
60		// 'words' that consist of <3 or >20 characters are removed.
61		$entry = preg_replace('/[ ]([\S]{1,2}|[\S]{21,})[ ]/',' ', $entry);
62	}
63
64	if ( !empty($stopword_list) )
65	{
66		for ($j = 0; $j < count($stopword_list); $j++)
67		{
68			$stopword = trim($stopword_list[$j]);
69
70			if ( $mode == 'post' || ( $stopword != 'not' && $stopword != 'and' && $stopword != 'or' ) )
71			{
72				$entry = str_replace(' ' . trim($stopword) . ' ', ' ', $entry);
73			}
74		}
75	}
76
77	if ( !empty($synonym_list) )
78	{
79		for ($j = 0; $j < count($synonym_list); $j++)
80		{
81			list($replace_synonym, $match_synonym) = split(' ', trim(strtolower($synonym_list[$j])));
82			if ( $mode == 'post' || ( $match_synonym != 'not' && $match_synonym != 'and' && $match_synonym != 'or' ) )
83			{
84				$entry =  str_replace(' ' . trim($match_synonym) . ' ', ' ' . trim($replace_synonym) . ' ', $entry);
85			}
86		}
87	}
88
89	return $entry;
90}
91
92function split_words($entry, $mode = 'post')
93{
94	// If you experience problems with the new method, uncomment this block.
95/*
96	$rex = ( $mode == 'post' ) ? "/\b([\w��-�][\w��-�']*[\w��-�]+|[\w��-�]+?)\b/" : '/(\*?[a-z0-9��-�]+\*?)|\b([a-z0-9��-�]+)\b/';
97	preg_match_all($rex, $entry, $split_entries);
98
99	return $split_entries[1];
100*/
101	// Trim 1+ spaces to one space and split this trimmed string into words.
102	return explode(' ', trim(preg_replace('#\s+#', ' ', $entry)));
103}
104
105function add_search_words($mode, $post_id, $post_text, $post_title = '')
106{
107	global $db, $phpbb_root_path, $board_config, $lang;
108
109	$stopword_array = @file($phpbb_root_path . 'language/lang_' . $board_config['default_lang'] . "/search_stopwords.txt");
110	$synonym_array = @file($phpbb_root_path . 'language/lang_' . $board_config['default_lang'] . "/search_synonyms.txt");
111
112	$search_raw_words = array();
113	$search_raw_words['text'] = split_words(clean_words('post', $post_text, $stopword_array, $synonym_array));
114	$search_raw_words['title'] = split_words(clean_words('post', $post_title, $stopword_array, $synonym_array));
115
116	@set_time_limit(0);
117
118	$word = array();
119	$word_insert_sql = array();
120	while ( list($word_in, $search_matches) = @each($search_raw_words) )
121	{
122		$word_insert_sql[$word_in] = '';
123		if ( !empty($search_matches) )
124		{
125			for ($i = 0; $i < count($search_matches); $i++)
126			{
127				$search_matches[$i] = trim($search_matches[$i]);
128
129				if( $search_matches[$i] != '' )
130				{
131					$word[] = $search_matches[$i];
132					if ( !strstr($word_insert_sql[$word_in], "'" . $search_matches[$i] . "'") )
133					{
134						$word_insert_sql[$word_in] .= ( $word_insert_sql[$word_in] != "" ) ? ", '" . $search_matches[$i] . "'" : "'" . $search_matches[$i] . "'";
135					}
136				}
137			}
138		}
139	}
140
141	if ( count($word) )
142	{
143		sort($word);
144
145		$prev_word = '';
146		$word_text_sql = '';
147		$temp_word = array();
148		for($i = 0; $i < count($word); $i++)
149		{
150			if ( $word[$i] != $prev_word )
151			{
152				$temp_word[] = $word[$i];
153				$word_text_sql .= ( ( $word_text_sql != '' ) ? ', ' : '' ) . "'" . $word[$i] . "'";
154			}
155			$prev_word = $word[$i];
156		}
157		$word = $temp_word;
158
159		$check_words = array();
160		switch( SQL_LAYER )
161		{
162			case 'postgresql':
163			case 'msaccess':
164			case 'mssql-odbc':
165			case 'oracle':
166			case 'db2':
167				$sql = "SELECT word_id, word_text
168					FROM " . SEARCH_WORD_TABLE . "
169					WHERE word_text IN ($word_text_sql)";
170				if ( !($result = $db->sql_query($sql)) )
171				{
172					message_die(GENERAL_ERROR, 'Could not select words', '', __LINE__, __FILE__, $sql);
173				}
174
175				while ( $row = $db->sql_fetchrow($result) )
176				{
177					$check_words[$row['word_text']] = $row['word_id'];
178				}
179				break;
180		}
181
182		$value_sql = '';
183		$match_word = array();
184		for ($i = 0; $i < count($word); $i++)
185		{
186			$new_match = true;
187			if ( isset($check_words[$word[$i]]) )
188			{
189				$new_match = false;
190			}
191
192			if ( $new_match )
193			{
194				switch( SQL_LAYER )
195				{
196					case 'mysql':
197					case 'mysql4':
198						$value_sql .= ( ( $value_sql != '' ) ? ', ' : '' ) . '(\'' . $word[$i] . '\', 0)';
199						break;
200					case 'mssql':
201					case 'mssql-odbc':
202						$value_sql .= ( ( $value_sql != '' ) ? ' UNION ALL ' : '' ) . "SELECT '" . $word[$i] . "', 0";
203						break;
204					default:
205						$sql = "INSERT INTO " . SEARCH_WORD_TABLE . " (word_text, word_common)
206							VALUES ('" . $word[$i] . "', 0)";
207						if( !$db->sql_query($sql) )
208						{
209							message_die(GENERAL_ERROR, 'Could not insert new word', '', __LINE__, __FILE__, $sql);
210						}
211						break;
212				}
213			}
214		}
215
216		if ( $value_sql != '' )
217		{
218			switch ( SQL_LAYER )
219			{
220				case 'mysql':
221				case 'mysql4':
222					$sql = "INSERT IGNORE INTO " . SEARCH_WORD_TABLE . " (word_text, word_common)
223						VALUES $value_sql";
224					break;
225				case 'mssql':
226				case 'mssql-odbc':
227					$sql = "INSERT INTO " . SEARCH_WORD_TABLE . " (word_text, word_common)
228						$value_sql";
229					break;
230			}
231
232			if ( !$db->sql_query($sql) )
233			{
234				message_die(GENERAL_ERROR, 'Could not insert new word', '', __LINE__, __FILE__, $sql);
235			}
236		}
237	}
238
239	while( list($word_in, $match_sql) = @each($word_insert_sql) )
240	{
241		$title_match = ( $word_in == 'title' ) ? 1 : 0;
242
243		if ( $match_sql != '' )
244		{
245			$sql = "INSERT INTO " . SEARCH_MATCH_TABLE . " (post_id, word_id, title_match)
246				SELECT $post_id, word_id, $title_match
247					FROM " . SEARCH_WORD_TABLE . "
248					WHERE word_text IN ($match_sql)";
249			if ( !$db->sql_query($sql) )
250			{
251				message_die(GENERAL_ERROR, 'Could not insert new word matches', '', __LINE__, __FILE__, $sql);
252			}
253		}
254	}
255
256	if ($mode == 'single')
257	{
258		remove_common('single', 4/10, $word);
259	}
260
261	return;
262}
263
264//
265// Check if specified words are too common now
266//
267function remove_common($mode, $fraction, $word_id_list = array())
268{
269	global $db;
270
271	$sql = "SELECT COUNT(post_id) AS total_posts
272		FROM " . POSTS_TABLE;
273	if ( !($result = $db->sql_query($sql)) )
274	{
275		message_die(GENERAL_ERROR, 'Could not obtain post count', '', __LINE__, __FILE__, $sql);
276	}
277
278	$row = $db->sql_fetchrow($result);
279
280	if ( $row['total_posts'] >= 100 )
281	{
282		$common_threshold = floor($row['total_posts'] * $fraction);
283
284		if ( $mode == 'single' && count($word_id_list) )
285		{
286			$word_id_sql = '';
287			for($i = 0; $i < count($word_id_list); $i++)
288			{
289				$word_id_sql .= ( ( $word_id_sql != '' ) ? ', ' : '' ) . "'" . $word_id_list[$i] . "'";
290			}
291
292			$sql = "SELECT m.word_id
293				FROM " . SEARCH_MATCH_TABLE . " m, " . SEARCH_WORD_TABLE . " w
294				WHERE w.word_text IN ($word_id_sql)
295					AND m.word_id = w.word_id
296				GROUP BY m.word_id
297				HAVING COUNT(m.word_id) > $common_threshold";
298		}
299		else
300		{
301			$sql = "SELECT word_id
302				FROM " . SEARCH_MATCH_TABLE . "
303				GROUP BY word_id
304				HAVING COUNT(word_id) > $common_threshold";
305		}
306
307		if ( !($result = $db->sql_query($sql)) )
308		{
309			message_die(GENERAL_ERROR, 'Could not obtain common word list', '', __LINE__, __FILE__, $sql);
310		}
311
312		$common_word_id = '';
313		while ( $row = $db->sql_fetchrow($result) )
314		{
315			$common_word_id .= ( ( $common_word_id != '' ) ? ', ' : '' ) . $row['word_id'];
316		}
317		$db->sql_freeresult($result);
318
319		if ( $common_word_id != '' )
320		{
321			$sql = "UPDATE " . SEARCH_WORD_TABLE . "
322				SET word_common = " . TRUE . "
323				WHERE word_id IN ($common_word_id)";
324			if ( !$db->sql_query($sql) )
325			{
326				message_die(GENERAL_ERROR, 'Could not delete word list entry', '', __LINE__, __FILE__, $sql);
327			}
328
329			$sql = "DELETE FROM " . SEARCH_MATCH_TABLE . "
330				WHERE word_id IN ($common_word_id)";
331			if ( !$db->sql_query($sql) )
332			{
333				message_die(GENERAL_ERROR, 'Could not delete word match entry', '', __LINE__, __FILE__, $sql);
334			}
335		}
336	}
337
338	return;
339}
340
341function remove_search_post($post_id_sql)
342{
343	global $db;
344
345	$words_removed = false;
346
347	switch ( SQL_LAYER )
348	{
349		case 'mysql':
350		case 'mysql4':
351			$sql = "SELECT word_id
352				FROM " . SEARCH_MATCH_TABLE . "
353				WHERE post_id IN ($post_id_sql)
354				GROUP BY word_id";
355			if ( $result = $db->sql_query($sql) )
356			{
357				$word_id_sql = '';
358				while ( $row = $db->sql_fetchrow($result) )
359				{
360					$word_id_sql .= ( $word_id_sql != '' ) ? ', ' . $row['word_id'] : $row['word_id'];
361				}
362
363				$sql = "SELECT word_id
364					FROM " . SEARCH_MATCH_TABLE . "
365					WHERE word_id IN ($word_id_sql)
366					GROUP BY word_id
367					HAVING COUNT(word_id) = 1";
368				if ( $result = $db->sql_query($sql) )
369				{
370					$word_id_sql = '';
371					while ( $row = $db->sql_fetchrow($result) )
372					{
373						$word_id_sql .= ( $word_id_sql != '' ) ? ', ' . $row['word_id'] : $row['word_id'];
374					}
375
376					if ( $word_id_sql != '' )
377					{
378						$sql = "DELETE FROM " . SEARCH_WORD_TABLE . "
379							WHERE word_id IN ($word_id_sql)";
380						if ( !$db->sql_query($sql) )
381						{
382							message_die(GENERAL_ERROR, 'Could not delete word list entry', '', __LINE__, __FILE__, $sql);
383						}
384
385						$words_removed = $db->sql_affectedrows();
386					}
387				}
388			}
389			break;
390
391		default:
392			$sql = "DELETE FROM " . SEARCH_WORD_TABLE . "
393				WHERE word_id IN (
394					SELECT word_id
395					FROM " . SEARCH_MATCH_TABLE . "
396					WHERE word_id IN (
397						SELECT word_id
398						FROM " . SEARCH_MATCH_TABLE . "
399						WHERE post_id IN ($post_id_sql)
400						GROUP BY word_id
401					)
402					GROUP BY word_id
403					HAVING COUNT(word_id) = 1
404				)";
405			if ( !$db->sql_query($sql) )
406			{
407				message_die(GENERAL_ERROR, 'Could not delete old words from word table', '', __LINE__, __FILE__, $sql);
408			}
409
410			$words_removed = $db->sql_affectedrows();
411
412			break;
413	}
414
415	$sql = "DELETE FROM " . SEARCH_MATCH_TABLE . "
416		WHERE post_id IN ($post_id_sql)";
417	if ( !$db->sql_query($sql) )
418	{
419		message_die(GENERAL_ERROR, 'Error in deleting post', '', __LINE__, __FILE__, $sql);
420	}
421
422	return $words_removed;
423}
424
425//
426// Username search
427//
428function username_search($search_match)
429{
430	global $db, $board_config, $template, $lang, $images, $theme, $phpEx, $phpbb_root_path;
431	global $starttime, $gen_simple_header;
432
433	$gen_simple_header = TRUE;
434
435	$username_list = '';
436	if ( !empty($search_match) )
437	{
438		$username_search = preg_replace('/\*/', '%', phpbb_clean_username($search_match));
439
440		$sql = "SELECT username
441			FROM " . USERS_TABLE . "
442			WHERE username LIKE '" . str_replace("\'", "''", $username_search) . "' AND user_id <> " . ANONYMOUS . "
443			ORDER BY username";
444		if ( !($result = $db->sql_query($sql)) )
445		{
446			message_die(GENERAL_ERROR, 'Could not obtain search results', '', __LINE__, __FILE__, $sql);
447		}
448
449		if ( $row = $db->sql_fetchrow($result) )
450		{
451			do
452			{
453				$username_list .= '<option value="' . $row['username'] . '">' . $row['username'] . '</option>';
454			}
455			while ( $row = $db->sql_fetchrow($result) );
456		}
457		else
458		{
459			$username_list .= '<option>' . $lang['No_match']. '</option>';
460		}
461		$db->sql_freeresult($result);
462	}
463
464	$page_title = $lang['Search'];
465	include($phpbb_root_path . 'includes/page_header.'.$phpEx);
466
467	$template->set_filenames(array(
468		'search_user_body' => 'search_username.tpl')
469	);
470
471	$template->assign_vars(array(
472		'USERNAME' => (!empty($search_match)) ? phpbb_clean_username($search_match) : '',
473
474		'L_CLOSE_WINDOW' => $lang['Close_window'],
475		'L_SEARCH_USERNAME' => $lang['Find_username'],
476		'L_UPDATE_USERNAME' => $lang['Select_username'],
477		'L_SELECT' => $lang['Select'],
478		'L_SEARCH' => $lang['Search'],
479		'L_SEARCH_EXPLAIN' => $lang['Search_author_explain'],
480		'L_CLOSE_WINDOW' => $lang['Close_window'],
481
482		'S_USERNAME_OPTIONS' => $username_list,
483		'S_SEARCH_ACTION' => append_sid("search.$phpEx?mode=searchuser"))
484	);
485
486	if ( $username_list != '' )
487	{
488		$template->assign_block_vars('switch_select_name', array());
489	}
490
491	$template->pparse('search_user_body');
492
493	include($phpbb_root_path . 'includes/page_tail.'.$phpEx);
494
495	return;
496}
497
498?>