1 //
2 // $Id: sphinxutils.cpp 4113 2013-08-26 07:43:28Z deogar $
3 //
4 
5 //
6 // Copyright (c) 2001-2013, Andrew Aksyonoff
7 // Copyright (c) 2008-2013, Sphinx Technologies Inc
8 // All rights reserved
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15 
16 /// @file sphinxutils.cpp
17 /// Implementations for Sphinx utilities shared classes.
18 
19 #include "sphinx.h"
20 #include "sphinxutils.h"
21 #include <ctype.h>
22 #include <fcntl.h>
23 #include <errno.h>
24 #if HAVE_EXECINFO_H
25 #include <execinfo.h>
26 #endif
27 
28 #if USE_WINDOWS
29 #include <io.h> // for ::open on windows
30 #include <dbghelp.h>
31 #pragma comment(linker, "/defaultlib:dbghelp.lib")
32 #pragma message("Automatically linking with dbghelp.lib")
33 #else
34 #include <sys/wait.h>
35 #include <signal.h>
36 #endif
37 
38 /////////////////////////////////////////////////////////////////////////////
39 
ltrim(char * sLine)40 static char * ltrim ( char * sLine )
41 {
42 	while ( *sLine && isspace(*sLine) )
43 		sLine++;
44 	return sLine;
45 }
46 
47 
rtrim(char * sLine)48 static char * rtrim ( char * sLine )
49 {
50 	char * p = sLine + strlen(sLine) - 1;
51 	while ( p>=sLine && isspace(*p) )
52 		p--;
53 	p[1] = '\0';
54 	return sLine;
55 }
56 
57 
trim(char * sLine)58 static char * trim ( char * sLine )
59 {
60 	return ltrim ( rtrim ( sLine ) );
61 }
62 
63 //////////////////////////////////////////////////////////////////////////
64 
GetSize(const char * sKey,int iDefault) const65 int CSphConfigSection::GetSize ( const char * sKey, int iDefault ) const
66 {
67 	CSphVariant * pEntry = (*this)( sKey );
68 	if ( !pEntry )
69 		return iDefault;
70 
71 	char sMemLimit[256];
72 	strncpy ( sMemLimit, pEntry->cstr(), sizeof(sMemLimit) );
73 	sMemLimit [ sizeof(sMemLimit)-1 ] = '\0';
74 
75 	int iLen = strlen ( sMemLimit );
76 	if ( !iLen )
77 		return iDefault;
78 
79 	iLen--;
80 	int iScale = 1;
81 	if ( toupper ( sMemLimit[iLen] )=='K' )
82 	{
83 		iScale = 1024;
84 		sMemLimit[iLen] = '\0';
85 
86 	} else if ( toupper ( sMemLimit[iLen] )=='M' )
87 	{
88 		iScale = 1048576;
89 		sMemLimit[iLen] = '\0';
90 	}
91 
92 	char * sErr;
93 	int64_t iRes = strtoll ( sMemLimit, &sErr, 10 );
94 
95 	if ( !*sErr )
96 	{
97 		iRes *= iScale;
98 		if ( iRes>INT_MAX )
99 		{
100 			sphWarning ( "'%s = %s' clamped to INT_MAX", sKey, pEntry->cstr() );
101 			iRes = INT_MAX;
102 		}
103 	} else
104 	{
105 		sphWarning ( "'%s = %s' parse error '%s'", sKey, pEntry->cstr(), sErr );
106 		iRes = iDefault;
107 	}
108 
109 	return (int)iRes;
110 }
111 
112 //////////////////////////////////////////////////////////////////////////
113 // CONFIG PARSER
114 //////////////////////////////////////////////////////////////////////////
115 
116 /// key flags
117 enum
118 {
119 	KEY_DEPRECATED		= 1UL<<0,
120 	KEY_LIST			= 1UL<<1
121 };
122 
123 /// key descriptor for validation purposes
124 struct KeyDesc_t
125 {
126 	const char *		m_sKey;		///< key name
127 	int					m_iFlags;	///< flags
128 	const char *		m_sExtra;	///< extra stuff (deprecated name, for now)
129 };
130 
131 /// allowed keys for source section
132 static KeyDesc_t g_dKeysSource[] =
133 {
134 	{ "type",					0, NULL },
135 	{ "strip_html",				KEY_DEPRECATED, "html_strip (per-index)" },
136 	{ "index_html_attrs",		KEY_DEPRECATED, "html_index_attrs (per-index)" },
137 	{ "sql_host",				0, NULL },
138 	{ "sql_user",				0, NULL },
139 	{ "sql_pass",				0, NULL },
140 	{ "sql_db",					0, NULL },
141 	{ "sql_port",				0, NULL },
142 	{ "sql_sock",				0, NULL },
143 	{ "mysql_connect_flags",	0, NULL },
144 	{ "mysql_ssl_key",			0, NULL },
145 	{ "mysql_ssl_cert",			0, NULL },
146 	{ "mysql_ssl_ca",			0, NULL },
147 	{ "mssql_winauth",			0, NULL },
148 	{ "mssql_unicode",			0, NULL },
149 	{ "sql_query_pre",			KEY_LIST, NULL },
150 	{ "sql_query",				0, NULL },
151 	{ "sql_query_range",		0, NULL },
152 	{ "sql_range_step",			0, NULL },
153 	{ "sql_query_killlist",		0, NULL },
154 	{ "sql_attr_uint",			KEY_LIST, NULL },
155 	{ "sql_attr_bool",			KEY_LIST, NULL },
156 	{ "sql_attr_timestamp",		KEY_LIST, NULL },
157 	{ "sql_attr_str2ordinal",	KEY_LIST, NULL },
158 	{ "sql_attr_float",			KEY_LIST, NULL },
159 	{ "sql_attr_bigint",		KEY_LIST, NULL },
160 	{ "sql_attr_multi",			KEY_LIST, NULL },
161 	{ "sql_query_post",			KEY_LIST, NULL },
162 	{ "sql_query_post_index",	KEY_LIST, NULL },
163 	{ "sql_ranged_throttle",	0, NULL },
164 	{ "sql_query_info",			0, NULL },
165 	{ "xmlpipe_command",		0, NULL },
166 	{ "xmlpipe_field",			KEY_LIST, NULL },
167 	{ "xmlpipe_attr_uint",		KEY_LIST, NULL },
168 	{ "xmlpipe_attr_timestamp",	KEY_LIST, NULL },
169 	{ "xmlpipe_attr_str2ordinal",	KEY_LIST, NULL },
170 	{ "xmlpipe_attr_bool",		KEY_LIST, NULL },
171 	{ "xmlpipe_attr_float",		KEY_LIST, NULL },
172 	{ "xmlpipe_attr_bigint",	KEY_LIST, NULL },
173 	{ "xmlpipe_attr_multi",		KEY_LIST, NULL },
174 	{ "xmlpipe_attr_multi_64",	KEY_LIST, NULL },
175 	{ "xmlpipe_attr_string",	KEY_LIST, NULL },
176 	{ "xmlpipe_attr_wordcount",	KEY_LIST, NULL },
177 	{ "xmlpipe_field_string",	KEY_LIST, NULL },
178 	{ "xmlpipe_field_wordcount",	KEY_LIST, NULL },
179 	{ "xmlpipe_fixup_utf8",		0, NULL },
180 	{ "sql_group_column",		KEY_LIST | KEY_DEPRECATED, "sql_attr_uint" },
181 	{ "sql_date_column",		KEY_LIST | KEY_DEPRECATED, "sql_attr_timestamp" },
182 	{ "sql_str2ordinal_column",	KEY_LIST | KEY_DEPRECATED, "sql_attr_str2ordinal" },
183 	{ "unpack_zlib",			KEY_LIST, NULL },
184 	{ "unpack_mysqlcompress",	KEY_LIST, NULL },
185 	{ "unpack_mysqlcompress_maxsize", 0, NULL },
186 	{ "odbc_dsn",				0, NULL },
187 	{ "sql_joined_field",		KEY_LIST, NULL },
188 	{ "sql_attr_string",		KEY_LIST, NULL },
189 	{ "sql_attr_str2wordcount",	KEY_LIST, NULL },
190 	{ "sql_field_string",		KEY_LIST, NULL },
191 	{ "sql_field_str2wordcount",	KEY_LIST, NULL },
192 	{ "sql_file_field",			KEY_LIST, NULL },
193 	{ "sql_column_buffers",		0, NULL },
194 	{ NULL,						0, NULL }
195 };
196 
197 /// allowed keys for index section
198 static KeyDesc_t g_dKeysIndex[] =
199 {
200 	{ "source",					KEY_LIST, NULL },
201 	{ "path",					0, NULL },
202 	{ "docinfo",				0, NULL },
203 	{ "mlock",					0, NULL },
204 	{ "morphology",				0, NULL },
205 	{ "stopwords",				0, NULL },
206 	{ "synonyms",				KEY_DEPRECATED, "exceptions" },
207 	{ "exceptions",				0, NULL },
208 	{ "wordforms",				0, NULL },
209 	{ "min_word_len",			0, NULL },
210 	{ "charset_type",			0, NULL },
211 	{ "charset_table",			0, NULL },
212 	{ "ignore_chars",			0, NULL },
213 	{ "min_prefix_len",			0, NULL },
214 	{ "min_infix_len",			0, NULL },
215 	{ "prefix_fields",			0, NULL },
216 	{ "infix_fields",			0, NULL },
217 	{ "enable_star",			0, NULL },
218 	{ "ngram_len",				0, NULL },
219 	{ "ngram_chars",			0, NULL },
220 	{ "phrase_boundary",		0, NULL },
221 	{ "phrase_boundary_step",	0, NULL },
222 	{ "ondisk_dict",			0, NULL },
223 	{ "type",					0, NULL },
224 	{ "local",					KEY_LIST, NULL },
225 	{ "agent",					KEY_LIST, NULL },
226 	{ "agent_blackhole",		KEY_LIST, NULL },
227 	{ "agent_connect_timeout",	0, NULL },
228 	{ "agent_query_timeout",	0, NULL },
229 	{ "html_strip",				0, NULL },
230 	{ "html_index_attrs",		0, NULL },
231 	{ "html_remove_elements",	0, NULL },
232 	{ "preopen",				0, NULL },
233 	{ "inplace_enable",			0, NULL },
234 	{ "inplace_hit_gap",		0, NULL },
235 	{ "inplace_docinfo_gap",	0, NULL },
236 	{ "inplace_reloc_factor",	0, NULL },
237 	{ "inplace_write_factor",	0, NULL },
238 	{ "index_exact_words",		0, NULL },
239 	{ "min_stemming_len",		0, NULL },
240 	{ "overshort_step",			0, NULL },
241 	{ "stopword_step",			0, NULL },
242 	{ "blend_chars",			0, NULL },
243 	{ "expand_keywords",		0, NULL },
244 	{ "hitless_words",			0, NULL },
245 	{ "hit_format",				0, NULL },
246 	{ "rt_field",				KEY_LIST, NULL },
247 	{ "rt_attr_uint",			KEY_LIST, NULL },
248 	{ "rt_attr_bigint",			KEY_LIST, NULL },
249 	{ "rt_attr_float",			KEY_LIST, NULL },
250 	{ "rt_attr_timestamp",		KEY_LIST, NULL },
251 	{ "rt_attr_string",			KEY_LIST, NULL },
252 	{ "rt_attr_multi",			KEY_LIST, NULL },
253 	{ "rt_attr_multi_64",		KEY_LIST, NULL },
254 	{ "rt_mem_limit",			0, NULL },
255 	{ "dict",					0, NULL },
256 	{ "index_sp",				0, NULL },
257 	{ "index_zones",			0, NULL },
258 	{ "blend_mode",				0, NULL },
259 	{ NULL,						0, NULL }
260 };
261 
262 /// allowed keys for indexer section
263 static KeyDesc_t g_dKeysIndexer[] =
264 {
265 	{ "mem_limit",				0, NULL },
266 	{ "max_iops",				0, NULL },
267 	{ "max_iosize",				0, NULL },
268 	{ "max_xmlpipe2_field",		0, NULL },
269 	{ "max_file_field_buffer",	0, NULL },
270 	{ "write_buffer",			0, NULL },
271 	{ "on_file_field_error",	0, NULL },
272 	{ NULL,						0, NULL }
273 };
274 
275 /// allowed keys for searchd section
276 static KeyDesc_t g_dKeysSearchd[] =
277 {
278 	{ "address",				KEY_DEPRECATED, "listen" },
279 	{ "port",					0, NULL },
280 	{ "listen",					KEY_LIST, NULL },
281 	{ "log",					0, NULL },
282 	{ "query_log",				0, NULL },
283 	{ "read_timeout",			0, NULL },
284 	{ "client_timeout",			0, NULL },
285 	{ "max_children",			0, NULL },
286 	{ "pid_file",				0, NULL },
287 	{ "max_matches",			0, NULL },
288 	{ "seamless_rotate",		0, NULL },
289 	{ "preopen_indexes",		0, NULL },
290 	{ "unlink_old",				0, NULL },
291 	{ "ondisk_dict_default",	0, NULL },
292 	{ "attr_flush_period",		0, NULL },
293 	{ "max_packet_size",		0, NULL },
294 	{ "mva_updates_pool",		0, NULL },
295 	{ "crash_log_path",			KEY_DEPRECATED, NULL },
296 	{ "max_filters",			0, NULL },
297 	{ "max_filter_values",		0, NULL },
298 	{ "listen_backlog",			0, NULL },
299 	{ "read_buffer",			0, NULL },
300 	{ "read_unhinted",			0, NULL },
301 	{ "max_batch_queries",		0, NULL },
302 	{ "subtree_docs_cache",		0, NULL },
303 	{ "subtree_hits_cache",		0, NULL },
304 	{ "workers",				0, NULL },
305 	{ "prefork",				0, NULL },
306 	{ "dist_threads",			0, NULL },
307 	{ "binlog_flush",			0, NULL },
308 	{ "binlog_path",			0, NULL },
309 	{ "binlog_max_log_size",	0, NULL },
310 	{ "thread_stack",			0, NULL },
311 	{ "expansion_limit",		0, NULL },
312 	{ "compat_sphinxql_magics",	0, NULL },
313 	{ "rt_flush_period",		0, NULL },
314 	{ "query_log_format",		0, NULL },
315 	{ "mysql_version_string",	0, NULL },
316 	{ "plugin_dir",				0, NULL },
317 	{ "collation_server",		0, NULL },
318 	{ "collation_libc_locale",	0, NULL },
319 	{ "watchdog",				0, NULL },
320 	{ "prefork_rotation_throttle", 0, NULL },
321 	{ NULL,						0, NULL }
322 };
323 
324 //////////////////////////////////////////////////////////////////////////
325 
CSphConfigParser()326 CSphConfigParser::CSphConfigParser ()
327 	: m_sFileName ( "" )
328 	, m_iLine ( -1 )
329 {
330 }
331 
332 
IsPlainSection(const char * sKey)333 bool CSphConfigParser::IsPlainSection ( const char * sKey )
334 {
335 	if ( !strcasecmp ( sKey, "indexer" ) )		return true;
336 	if ( !strcasecmp ( sKey, "searchd" ) )		return true;
337 	if ( !strcasecmp ( sKey, "search" ) )		return true;
338 	return false;
339 }
340 
341 
IsNamedSection(const char * sKey)342 bool CSphConfigParser::IsNamedSection ( const char * sKey )
343 {
344 	if ( !strcasecmp ( sKey, "source" ) )		return true;
345 	if ( !strcasecmp ( sKey, "index" ) )		return true;
346 	return false;
347 }
348 
349 
AddSection(const char * sType,const char * sName)350 bool CSphConfigParser::AddSection ( const char * sType, const char * sName )
351 {
352 	m_sSectionType = sType;
353 	m_sSectionName = sName;
354 
355 	if ( !m_tConf.Exists ( m_sSectionType ) )
356 		m_tConf.Add ( CSphConfigType(), m_sSectionType ); // FIXME! be paranoid, verify that it returned true
357 
358 	if ( m_tConf[m_sSectionType].Exists ( m_sSectionName ) )
359 	{
360 		snprintf ( m_sError, sizeof(m_sError), "section '%s' (type='%s') already exists", sName, sType );
361 		return false;
362 	}
363 	m_tConf[m_sSectionType].Add ( CSphConfigSection(), m_sSectionName ); // FIXME! be paranoid, verify that it returned true
364 
365 	return true;
366 }
367 
368 
AddKey(const char * sKey,char * sValue)369 void CSphConfigParser::AddKey ( const char * sKey, char * sValue )
370 {
371 	assert ( m_tConf.Exists ( m_sSectionType ) );
372 	assert ( m_tConf[m_sSectionType].Exists ( m_sSectionName ) );
373 
374 	sValue = trim ( sValue );
375 	CSphConfigSection & tSec = m_tConf[m_sSectionType][m_sSectionName];
376 	if ( tSec(sKey) )
377 	{
378 		if ( tSec[sKey].m_bTag )
379 		{
380 			// override value or list with a new value
381 			SafeDelete ( tSec[sKey].m_pNext ); // only leave the first array element
382 			tSec[sKey] = sValue; // update its value
383 			tSec[sKey].m_bTag = false; // mark it as overridden
384 
385 		} else
386 		{
387 			// chain to tail, to keep the order
388 			CSphVariant * pTail = &tSec[sKey];
389 			while ( pTail->m_pNext )
390 				pTail = pTail->m_pNext;
391 			pTail->m_pNext = new CSphVariant ( sValue );
392 		}
393 
394 	} else
395 	{
396 		// just add
397 		tSec.Add ( sValue, sKey ); // FIXME! be paranoid, verify that it returned true
398 	}
399 }
400 
401 
ValidateKey(const char * sKey)402 bool CSphConfigParser::ValidateKey ( const char * sKey )
403 {
404 	// get proper descriptor table
405 	// OPTIMIZE! move lookup to AddSection
406 	const KeyDesc_t * pDesc = NULL;
407 	if ( m_sSectionType=="source" )			pDesc = g_dKeysSource;
408 	else if ( m_sSectionType=="index" )		pDesc = g_dKeysIndex;
409 	else if ( m_sSectionType=="indexer" )	pDesc = g_dKeysIndexer;
410 	else if ( m_sSectionType=="searchd" )	pDesc = g_dKeysSearchd;
411 	if ( !pDesc )
412 	{
413 		snprintf ( m_sError, sizeof(m_sError), "unknown section type '%s'", m_sSectionType.cstr() );
414 		return false;
415 	}
416 
417 	// check if the key is known
418 	while ( pDesc->m_sKey && strcasecmp ( pDesc->m_sKey, sKey ) )
419 		pDesc++;
420 	if ( !pDesc->m_sKey )
421 	{
422 		snprintf ( m_sError, sizeof(m_sError), "unknown key name '%s'", sKey );
423 		return false;
424 	}
425 
426 	// warn about deprecate keys
427 	if ( pDesc->m_iFlags & KEY_DEPRECATED )
428 		if ( ++m_iWarnings<=WARNS_THRESH )
429 			fprintf ( stdout, "WARNING: key '%s' is deprecated in %s line %d; use '%s' instead.\n", sKey, m_sFileName.cstr(), m_iLine, pDesc->m_sExtra );
430 
431 	// warn about list/non-list keys
432 	if (!( pDesc->m_iFlags & KEY_LIST ))
433 	{
434 		CSphConfigSection & tSec = m_tConf[m_sSectionType][m_sSectionName];
435 		if ( tSec(sKey) && !tSec[sKey].m_bTag )
436 			if ( ++m_iWarnings<=WARNS_THRESH )
437 				fprintf ( stdout, "WARNING: key '%s' is not multi-value; value in %s line %d will be ignored.\n", sKey, m_sFileName.cstr(), m_iLine );
438 	}
439 
440 	return true;
441 }
442 
443 #if !USE_WINDOWS
444 
TryToExec(char * pBuffer,const char * szFilename,CSphVector<char> & dResult,char * sError,int iErrorLen)445 bool TryToExec ( char * pBuffer, const char * szFilename, CSphVector<char> & dResult, char * sError, int iErrorLen )
446 {
447 	int dPipe[2] = { -1, -1 };
448 
449 	if ( pipe ( dPipe ) )
450 	{
451 		snprintf ( sError, iErrorLen, "pipe() failed (error=%s)", strerror(errno) );
452 		return false;
453 	}
454 
455 	pBuffer = trim ( pBuffer );
456 
457 	int iRead = dPipe[0];
458 	int iWrite = dPipe[1];
459 
460 	int iChild = fork();
461 
462 	if ( iChild==0 )
463 	{
464 		close ( iRead );
465 		close ( STDOUT_FILENO );
466 		dup2 ( iWrite, STDOUT_FILENO );
467 
468 		char * pPtr = pBuffer;
469 		char * pArgs = NULL;
470 		while ( *pPtr )
471 		{
472 			if ( sphIsSpace ( *pPtr ) )
473 			{
474 				*pPtr = '\0';
475 				pArgs = trim ( pPtr+1 );
476 				break;
477 			}
478 
479 			pPtr++;
480 		}
481 
482 		if ( pArgs )
483 			execl ( pBuffer, pBuffer, pArgs, szFilename, (char*)NULL );
484 		else
485 			execl ( pBuffer, pBuffer, szFilename, (char*)NULL );
486 
487 		exit ( 1 );
488 
489 	} else if ( iChild==-1 )
490 	{
491 		snprintf ( sError, iErrorLen, "fork failed: [%d] %s", errno, strerror(errno) );
492 		return false;
493 	}
494 
495 	close ( iWrite );
496 
497 	int iBytesRead, iTotalRead = 0;
498 	const int BUFFER_SIZE = 65536;
499 
500 	dResult.Reset ();
501 
502 	do
503 	{
504 		dResult.Resize ( iTotalRead + BUFFER_SIZE );
505 		for ( ;; )
506 		{
507 			iBytesRead = read ( iRead, (void*)&(dResult [iTotalRead]), BUFFER_SIZE );
508 			if ( iBytesRead==-1 && errno==EINTR ) // we can get SIGCHLD just before eof
509 				continue;
510 			break;
511 		}
512 		iTotalRead += iBytesRead;
513 	}
514 	while ( iBytesRead > 0 );
515 	close ( iRead );
516 
517 	int iStatus, iResult;
518 	do
519 	{
520 		// can be interrupted by pretty much anything (e.g. SIGCHLD from other searchd children)
521 		iResult = waitpid ( iChild, &iStatus, 0 );
522 
523 		// they say this can happen if child exited and SIGCHLD was ignored
524 		// a cleaner one would be to temporary handle it here, but can we be bothered
525 		if ( iResult==-1 && errno==ECHILD )
526 		{
527 			iResult = iChild;
528 			iStatus = 0;
529 		}
530 
531 		if ( iResult==-1 && errno!=EINTR )
532 		{
533 			snprintf ( sError, iErrorLen, "waitpid() failed: [%d] %s", errno, strerror(errno) );
534 			return false;
535 		}
536 	}
537 	while ( iResult!=iChild );
538 
539 	if ( WIFEXITED ( iStatus ) && WEXITSTATUS ( iStatus ) )
540 	{
541 		// FIXME? read stderr and log that too
542 		snprintf ( sError, iErrorLen, "error executing '%s' status = %d", pBuffer, WEXITSTATUS ( iStatus ) );
543 		return false;
544 	}
545 
546 	if ( WIFSIGNALED ( iStatus ) )
547 	{
548 		snprintf ( sError, iErrorLen, "error executing '%s', killed by signal %d", pBuffer, WTERMSIG ( iStatus ) );
549 		return false;
550 	}
551 
552 	if ( iBytesRead < 0 )
553 	{
554 		snprintf ( sError, iErrorLen, "pipe read error: [%d] %s", errno, strerror(errno) );
555 		return false;
556 	}
557 
558 	dResult.Resize ( iTotalRead + 1 );
559 	dResult [iTotalRead] = '\0';
560 
561 	return true;
562 }
563 
TryToExec(char * pBuffer,const char * szFilename,CSphVector<char> & dResult)564 bool CSphConfigParser::TryToExec ( char * pBuffer, const char * szFilename, CSphVector<char> & dResult )
565 {
566 	return ::TryToExec ( pBuffer, szFilename, dResult, m_sError, sizeof(m_sError) );
567 }
568 #endif
569 
570 
GetBufferString(char * szDest,int iMax,const char * & szSource)571 char * CSphConfigParser::GetBufferString ( char * szDest, int iMax, const char * & szSource )
572 {
573 	int nCopied = 0;
574 
575 	while ( nCopied < iMax-1 && szSource[nCopied] && ( nCopied==0 || szSource[nCopied-1]!='\n' ) )
576 	{
577 		szDest [nCopied] = szSource [nCopied];
578 		nCopied++;
579 	}
580 
581 	if ( !nCopied )
582 		return NULL;
583 
584 	szSource += nCopied;
585 	szDest [nCopied] = '\0';
586 
587 	return szDest;
588 }
589 
ReParse(const char * sFileName,const char * pBuffer)590 bool CSphConfigParser::ReParse ( const char * sFileName, const char * pBuffer )
591 {
592 	CSphConfig tOldConfig = m_tConf;
593 	m_tConf.Reset();
594 	if ( Parse ( sFileName, pBuffer ) )
595 		return true;
596 	m_tConf = tOldConfig;
597 	return false;
598 }
599 
Parse(const char * sFileName,const char * pBuffer)600 bool CSphConfigParser::Parse ( const char * sFileName, const char * pBuffer )
601 {
602 	const int L_STEPBACK	= 16;
603 	const int L_TOKEN		= 64;
604 	const int L_BUFFER		= 8192;
605 
606 	FILE * fp = NULL;
607 	if ( !pBuffer )
608 	{
609 		// open file
610 		fp = fopen ( sFileName, "rb" );
611 		if ( !fp )
612 			return false;
613 	}
614 
615 	// init parser
616 	m_sFileName = sFileName;
617 	m_iLine = 0;
618 	m_iWarnings = 0;
619 
620 	char * p = NULL;
621 	char * pEnd = NULL;
622 
623 	char sBuf [ L_BUFFER ] = { 0 };
624 
625 	char sToken [ L_TOKEN ] = { 0 };
626 	int iToken = 0;
627 	int iCh = -1;
628 
629 	enum { S_TOP, S_SKIP2NL, S_TOK, S_TYPE, S_SEC, S_CHR, S_VALUE, S_SECNAME, S_SECBASE, S_KEY } eState = S_TOP, eStack[8];
630 	int iStack = 0;
631 
632 	int iValue = 0, iValueMax = 65535;
633 	char * sValue = new char [ iValueMax+1 ];
634 
635 	#define LOC_ERROR(_msg) { strncpy ( m_sError, _msg, sizeof(m_sError) ); break; }
636 	#define LOC_ERROR2(_msg,_a) { snprintf ( m_sError, sizeof(m_sError), _msg, _a ); break; }
637 	#define LOC_ERROR3(_msg,_a,_b) { snprintf ( m_sError, sizeof(m_sError), _msg, _a, _b ); break; }
638 	#define LOC_ERROR4(_msg,_a,_b,_c) { snprintf ( m_sError, sizeof(m_sError), _msg, _a, _b, _c ); break; }
639 
640 	#define LOC_PUSH(_new) { assert ( iStack<int(sizeof(eStack)/sizeof(eState)) ); eStack[iStack++] = eState; eState = _new; }
641 	#define LOC_POP() { assert ( iStack>0 ); eState = eStack[--iStack]; }
642 	#define LOC_BACK() { p--; }
643 
644 	m_sError[0] = '\0';
645 
646 	for ( ; ; p++ )
647 	{
648 		// if this line is over, load next line
649 		if ( p>=pEnd )
650 		{
651 			char * szResult = pBuffer ? GetBufferString ( sBuf, L_BUFFER, pBuffer ) : fgets ( sBuf, L_BUFFER, fp );
652 			if ( !szResult )
653 				break; // FIXME! check for read error
654 
655 			m_iLine++;
656 			int iLen = strlen(sBuf);
657 			if ( iLen<=0 )
658 				LOC_ERROR ( "internal error; fgets() returned empty string" );
659 
660 			p = sBuf;
661 			pEnd = sBuf + iLen;
662 			if ( pEnd[-1]!='\n' )
663 			{
664 				if ( iLen==L_BUFFER-1 )
665 					LOC_ERROR ( "line too long" );
666 			}
667 		}
668 
669 		// handle S_TOP state
670 		if ( eState==S_TOP )
671 		{
672 			if ( isspace(*p) )				continue;
673 
674 			if ( *p=='#' )
675 			{
676 #if !USE_WINDOWS
677 				if ( !pBuffer && m_iLine==1 && p==sBuf && p[1]=='!' )
678 				{
679 					CSphVector<char> dResult;
680 					if ( TryToExec ( p+2, sFileName, dResult ) )
681 						Parse ( sFileName, &dResult[0] );
682 					break;
683 				} else
684 #endif
685 				{
686 					LOC_PUSH ( S_SKIP2NL );
687 					continue;
688 				}
689 			}
690 
691 			if ( !sphIsAlpha(*p) )			LOC_ERROR ( "invalid token" );
692 											iToken = 0; LOC_PUSH ( S_TYPE ); LOC_PUSH ( S_TOK ); LOC_BACK(); continue;
693 		}
694 
695 		// handle S_SKIP2NL state
696 		if ( eState==S_SKIP2NL )
697 		{
698 			LOC_POP ();
699 			p = pEnd;
700 			continue;
701 		}
702 
703 		// handle S_TOK state
704 		if ( eState==S_TOK )
705 		{
706 			if ( !iToken && !sphIsAlpha(*p) )LOC_ERROR ( "internal error (non-alpha in S_TOK pos 0)" );
707 			if ( iToken==sizeof(sToken) )	LOC_ERROR ( "token too long" );
708 			if ( !sphIsAlpha(*p) )			{ LOC_POP (); sToken [ iToken ] = '\0'; iToken = 0; LOC_BACK(); continue; }
709 			if ( !iToken )					{ sToken[0] = '\0'; }
710 											sToken [ iToken++ ] = *p; continue;
711 		}
712 
713 		// handle S_TYPE state
714 		if ( eState==S_TYPE )
715 		{
716 			if ( isspace(*p) )				continue;
717 			if ( *p=='#' )					{ LOC_PUSH ( S_SKIP2NL ); continue; }
718 			if ( !sToken[0] )				{ LOC_ERROR ( "internal error (empty token in S_TYPE)" ); }
719 			if ( IsPlainSection(sToken) )	{ if ( !AddSection ( sToken, sToken ) ) break; sToken[0] = '\0'; LOC_POP (); LOC_PUSH ( S_SEC ); LOC_PUSH ( S_CHR ); iCh = '{'; LOC_BACK(); continue; }
720 			if ( IsNamedSection(sToken) )	{ m_sSectionType = sToken; sToken[0] = '\0'; LOC_POP (); LOC_PUSH ( S_SECNAME ); LOC_BACK(); continue; }
721 											LOC_ERROR2 ( "invalid section type '%s'", sToken );
722 		}
723 
724 		// handle S_CHR state
725 		if ( eState==S_CHR )
726 		{
727 			if ( isspace(*p) )				continue;
728 			if ( *p=='#' )					{ LOC_PUSH ( S_SKIP2NL ); continue; }
729 			if ( *p!=iCh )					LOC_ERROR3 ( "expected '%c', got '%c'", iCh, *p );
730 											LOC_POP (); continue;
731 		}
732 
733 		// handle S_SEC state
734 		if ( eState==S_SEC )
735 		{
736 			if ( isspace(*p) )				continue;
737 			if ( *p=='#' )					{ LOC_PUSH ( S_SKIP2NL ); continue; }
738 			if ( *p=='}' )					{ LOC_POP (); continue; }
739 			if ( sphIsAlpha(*p) )			{ LOC_PUSH ( S_KEY ); LOC_PUSH ( S_TOK ); LOC_BACK(); iValue = 0; sValue[0] = '\0'; continue; }
740 											LOC_ERROR2 ( "section contents: expected token, got '%c'", *p );
741 		}
742 
743 		// handle S_KEY state
744 		if ( eState==S_KEY )
745 		{
746 			// validate the key
747 			if ( !ValidateKey ( sToken ) )
748 				break;
749 
750 			// an assignment operator and a value must follow
751 			LOC_POP (); LOC_PUSH ( S_VALUE ); LOC_PUSH ( S_CHR ); iCh = '=';
752 			LOC_BACK(); // because we did not work the char at all
753 			continue;
754 		}
755 
756 		// handle S_VALUE state
757 		if ( eState==S_VALUE )
758 		{
759 			if ( *p=='\n' )					{ AddKey ( sToken, sValue ); iValue = 0; LOC_POP (); continue; }
760 			if ( *p=='#' )					{ AddKey ( sToken, sValue ); iValue = 0; LOC_POP (); LOC_PUSH ( S_SKIP2NL ); continue; }
761 			if ( *p=='\\' )
762 			{
763 				// backslash at the line end: continuation operator; let the newline be unhanlded
764 				if ( p[1]=='\r' || p[1]=='\n' ) { LOC_PUSH ( S_SKIP2NL ); continue; }
765 
766 				// backslash before number sign: comment start char escaping; advance and pass it
767 				if ( p[1]=='#' ) { p++; }
768 
769 				// otherwise: just a char, pass it
770 			}
771 			if ( iValue<iValueMax )			{ sValue[iValue++] = *p; sValue[iValue] = '\0'; }
772 											continue;
773 		}
774 
775 		// handle S_SECNAME state
776 		if ( eState==S_SECNAME )
777 		{
778 			if ( isspace(*p) )					{ continue; }
779 			if ( !sToken[0]&&!sphIsAlpha(*p))	{ LOC_ERROR2 ( "named section: expected name, got '%c'", *p ); }
780 
781 			if ( !sToken[0] )				{ LOC_PUSH ( S_TOK ); LOC_BACK(); continue; }
782 											if ( !AddSection ( m_sSectionType.cstr(), sToken ) ) break; sToken[0] = '\0';
783 			if ( *p==':' )					{ eState = S_SECBASE; continue; }
784 			if ( *p=='{' )					{ eState = S_SEC; continue; }
785 											LOC_ERROR2 ( "named section: expected ':' or '{', got '%c'", *p );
786 		}
787 
788 		// handle S_SECBASE state
789 		if ( eState==S_SECBASE )
790 		{
791 			if ( isspace(*p) )					{ continue; }
792 			if ( !sToken[0]&&!sphIsAlpha(*p))	{ LOC_ERROR2 ( "named section: expected parent name, got '%c'", *p ); }
793 			if ( !sToken[0] )					{ LOC_PUSH ( S_TOK ); LOC_BACK(); continue; }
794 
795 			// copy the section
796 			assert ( m_tConf.Exists ( m_sSectionType ) );
797 
798 			if ( !m_tConf [ m_sSectionType ].Exists ( sToken ) )
799 				LOC_ERROR4 ( "inherited section '%s': parent doesn't exist (parent name='%s', type='%s')",
800 					m_sSectionName.cstr(), sToken, m_sSectionType.cstr() );
801 
802 			CSphConfigSection & tDest = m_tConf [ m_sSectionType ][ m_sSectionName ];
803 			tDest = m_tConf [ m_sSectionType ][ sToken ];
804 
805 			// mark all values in the target section as "to be overridden"
806 			tDest.IterateStart ();
807 			while ( tDest.IterateNext() )
808 				tDest.IterateGet().m_bTag = true;
809 
810 			LOC_BACK();
811 			eState = S_SEC;
812 			LOC_PUSH ( S_CHR );
813 			iCh = '{';
814 			continue;
815 		}
816 
817 		LOC_ERROR2 ( "internal error (unhandled state %d)", eState );
818 	}
819 
820 	#undef LOC_POP
821 	#undef LOC_PUSH
822 	#undef LOC_ERROR
823 
824 	if ( !pBuffer )
825 		fclose ( fp );
826 
827 	SafeDeleteArray ( sValue );
828 
829 	if ( m_iWarnings>WARNS_THRESH )
830 		fprintf ( stdout, "WARNING: %d more warnings skipped.\n", m_iWarnings-WARNS_THRESH );
831 
832 	if ( strlen(m_sError) )
833 	{
834 		int iCol = (int)(p-sBuf+1);
835 
836 		int iCtx = Min ( L_STEPBACK, iCol ); // error context is upto L_STEPBACK chars back, but never going to prev line
837 		const char * sCtx = p-iCtx+1;
838 		if ( sCtx<sBuf )
839 			sCtx = sBuf;
840 
841 		char sStepback [ L_STEPBACK+1 ];
842 		memcpy ( sStepback, sCtx, iCtx );
843 		sStepback[iCtx] = '\0';
844 
845 		fprintf ( stdout, "ERROR: %s in %s line %d col %d.\n", m_sError, m_sFileName.cstr(), m_iLine, iCol );
846 		return false;
847 	}
848 
849 	return true;
850 }
851 
852 /////////////////////////////////////////////////////////////////////////////
853 
sphConfTokenizer(const CSphConfigSection & hIndex,CSphTokenizerSettings & tSettings,CSphString & sError)854 bool sphConfTokenizer ( const CSphConfigSection & hIndex, CSphTokenizerSettings & tSettings, CSphString & sError )
855 {
856 	tSettings.m_iNgramLen = Max ( hIndex.GetInt ( "ngram_len" ), 0 );
857 
858 	if ( !hIndex("charset_type") || hIndex["charset_type"]=="sbcs" )
859 	{
860 		tSettings.m_iType = TOKENIZER_SBCS;
861 
862 	} else if ( hIndex["charset_type"]=="utf-8" )
863 	{
864 		tSettings.m_iType = TOKENIZER_UTF8;
865 		if ( hIndex ( "ngram_chars" ) )
866 		{
867 			if ( tSettings.m_iNgramLen )
868 				tSettings.m_iType = TOKENIZER_NGRAM;
869 			else
870 				sphWarning ( "ngram_chars specified, but ngram_len=0; IGNORED" );
871 		}
872 
873 	} else
874 	{
875 		sError.SetSprintf ( "unknown charset type '%s'", hIndex["charset_type"].cstr() );
876 		return false;
877 	}
878 
879 	tSettings.m_sCaseFolding = hIndex.GetStr ( "charset_table" );
880 	tSettings.m_iMinWordLen = Max ( hIndex.GetInt ( "min_word_len" ), 0 );
881 	tSettings.m_sNgramChars = hIndex.GetStr ( "ngram_chars" );
882 	tSettings.m_sSynonymsFile = hIndex.GetStr ( "exceptions" ); // new option name
883 	if ( tSettings.m_sSynonymsFile.IsEmpty() )
884 		tSettings.m_sSynonymsFile = hIndex.GetStr ( "synonyms" ); // deprecated option name
885 	tSettings.m_sIgnoreChars = hIndex.GetStr ( "ignore_chars" );
886 	tSettings.m_sBlendChars = hIndex.GetStr ( "blend_chars" );
887 	tSettings.m_sBlendMode = hIndex.GetStr ( "blend_mode" );
888 
889 	// phrase boundaries
890 	int iBoundaryStep = Max ( hIndex.GetInt ( "phrase_boundary_step" ), -1 );
891 	if ( iBoundaryStep!=0 )
892 		tSettings.m_sBoundary = hIndex.GetStr ( "phrase_boundary" );
893 
894 	return true;
895 }
896 
sphConfDictionary(const CSphConfigSection & hIndex,CSphDictSettings & tSettings)897 void sphConfDictionary ( const CSphConfigSection & hIndex, CSphDictSettings & tSettings )
898 {
899 	tSettings.m_sMorphology = hIndex.GetStr ( "morphology" );
900 	tSettings.m_sStopwords = hIndex.GetStr ( "stopwords" );
901 	tSettings.m_sWordforms = hIndex.GetStr ( "wordforms" );
902 	tSettings.m_iMinStemmingLen = hIndex.GetInt ( "min_stemming_len", 1 );
903 
904 	if ( hIndex("dict") )
905 	{
906 		tSettings.m_bWordDict = false; // default to crc
907 		if ( hIndex["dict"]=="keywords" )
908 			tSettings.m_bWordDict = true;
909 		else if ( hIndex["dict"]!="crc" )
910 			fprintf ( stdout, "WARNING: unknown dict=%s, defaulting to crc\n", hIndex["dict"].cstr() );
911 	}
912 }
913 
914 
sphConfIndex(const CSphConfigSection & hIndex,CSphIndexSettings & tSettings,CSphString & sError)915 bool sphConfIndex ( const CSphConfigSection & hIndex, CSphIndexSettings & tSettings, CSphString & sError )
916 {
917 	// misc settings
918 	tSettings.m_iMinPrefixLen = Max ( hIndex.GetInt ( "min_prefix_len" ), 0 );
919 	tSettings.m_iMinInfixLen = Max ( hIndex.GetInt ( "min_infix_len" ), 0 );
920 	tSettings.m_iBoundaryStep = Max ( hIndex.GetInt ( "phrase_boundary_step" ), -1 );
921 	tSettings.m_bIndexExactWords = hIndex.GetInt ( "index_exact_words" )!=0;
922 	tSettings.m_iOvershortStep = Min ( Max ( hIndex.GetInt ( "overshort_step", 1 ), 0 ), 1 );
923 	tSettings.m_iStopwordStep = Min ( Max ( hIndex.GetInt ( "stopword_step", 1 ), 0 ), 1 );
924 
925 	// prefix/infix fields
926 	CSphString sFields;
927 
928 	sFields = hIndex.GetStr ( "prefix_fields" );
929 	sFields.ToLower();
930 	sphSplit ( tSettings.m_dPrefixFields, sFields.cstr() );
931 
932 	sFields = hIndex.GetStr ( "infix_fields" );
933 	sFields.ToLower();
934 	sphSplit ( tSettings.m_dInfixFields, sFields.cstr() );
935 
936 	if ( tSettings.m_iMinPrefixLen==0 && tSettings.m_dPrefixFields.GetLength()!=0 )
937 	{
938 		fprintf ( stdout, "WARNING: min_prefix_len=0, prefix_fields ignored\n" );
939 		tSettings.m_dPrefixFields.Reset();
940 	}
941 
942 	if ( tSettings.m_iMinInfixLen==0 && tSettings.m_dInfixFields.GetLength()!=0 )
943 	{
944 		fprintf ( stdout, "WARNING: min_infix_len=0, infix_fields ignored\n" );
945 		tSettings.m_dInfixFields.Reset();
946 	}
947 
948 	// the only way we could have both prefixes and infixes enabled is when specific field subsets are configured
949 	if ( tSettings.m_iMinInfixLen>0 && tSettings.m_iMinPrefixLen>0
950 		&& ( !tSettings.m_dPrefixFields.GetLength() || !tSettings.m_dInfixFields.GetLength() ) )
951 	{
952 		sError.SetSprintf ( "prefixes and infixes can not both be enabled on all fields" );
953 		return false;
954 	}
955 
956 	tSettings.m_dPrefixFields.Uniq();
957 	tSettings.m_dInfixFields.Uniq();
958 
959 	ARRAY_FOREACH ( i, tSettings.m_dPrefixFields )
960 		if ( tSettings.m_dInfixFields.Contains ( tSettings.m_dPrefixFields[i] ) )
961 	{
962 		sError.SetSprintf ( "field '%s' marked both as prefix and infix", tSettings.m_dPrefixFields[i].cstr() );
963 		return false;
964 	}
965 
966 	// html stripping
967 	if ( hIndex ( "html_strip" ) )
968 	{
969 		tSettings.m_bHtmlStrip = hIndex.GetInt ( "html_strip" )!=0;
970 		tSettings.m_sHtmlIndexAttrs = hIndex.GetStr ( "html_index_attrs" );
971 		tSettings.m_sHtmlRemoveElements = hIndex.GetStr ( "html_remove_elements" );
972 	}
973 
974 	// docinfo
975 	tSettings.m_eDocinfo = SPH_DOCINFO_EXTERN;
976 	if ( hIndex("docinfo") )
977 	{
978 		if ( hIndex["docinfo"]=="none" )		tSettings.m_eDocinfo = SPH_DOCINFO_NONE;
979 		else if ( hIndex["docinfo"]=="inline" )	tSettings.m_eDocinfo = SPH_DOCINFO_INLINE;
980 		else if ( hIndex["docinfo"]=="extern" )	tSettings.m_eDocinfo = SPH_DOCINFO_EXTERN;
981 		else
982 			fprintf ( stdout, "WARNING: unknown docinfo=%s, defaulting to extern\n", hIndex["docinfo"].cstr() );
983 	}
984 
985 	// hit format
986 	// TODO! add the description into documentation.
987 	tSettings.m_eHitFormat = SPH_HIT_FORMAT_INLINE;
988 	if ( hIndex("hit_format") )
989 	{
990 		if ( hIndex["hit_format"]=="plain" )		tSettings.m_eHitFormat = SPH_HIT_FORMAT_PLAIN;
991 		else if ( hIndex["hit_format"]=="inline" )	tSettings.m_eHitFormat = SPH_HIT_FORMAT_INLINE;
992 		else
993 			fprintf ( stdout, "WARNING: unknown hit_format=%s, defaulting to inline\n", hIndex["hit_format"].cstr() );
994 	}
995 
996 	// hit-less indices
997 	if ( hIndex("hitless_words") )
998 	{
999 		const CSphString & sValue = hIndex["hitless_words"];
1000 		if ( sValue=="all" )
1001 		{
1002 			tSettings.m_eHitless = SPH_HITLESS_ALL;
1003 		} else
1004 		{
1005 			tSettings.m_eHitless = SPH_HITLESS_SOME;
1006 			tSettings.m_sHitlessFiles = sValue;
1007 		}
1008 	}
1009 
1010 	// sentence and paragraph indexing
1011 	tSettings.m_bIndexSP = ( hIndex.GetInt ( "index_sp" )!=0 );
1012 	tSettings.m_sZones = hIndex.GetStr ( "index_zones" );
1013 
1014 	// all good
1015 	return true;
1016 }
1017 
1018 
sphFixupIndexSettings(CSphIndex * pIndex,const CSphConfigSection & hIndex,CSphString & sError)1019 bool sphFixupIndexSettings ( CSphIndex * pIndex, const CSphConfigSection & hIndex, CSphString & sError )
1020 {
1021 	bool bTokenizerSpawned = false;
1022 
1023 	if ( !pIndex->GetTokenizer () )
1024 	{
1025 		CSphTokenizerSettings tSettings;
1026 		if ( !sphConfTokenizer ( hIndex, tSettings, sError ) )
1027 			return false;
1028 
1029 		ISphTokenizer * pTokenizer = ISphTokenizer::Create ( tSettings, sError );
1030 		if ( !pTokenizer )
1031 			return false;
1032 
1033 		bTokenizerSpawned = true;
1034 		pIndex->SetTokenizer ( pTokenizer );
1035 	}
1036 
1037 	if ( !pIndex->GetDictionary () )
1038 	{
1039 		CSphDictSettings tSettings;
1040 		if ( pIndex->m_bId32to64 )
1041 			tSettings.m_bCrc32 = true;
1042 		sphConfDictionary ( hIndex, tSettings );
1043 		CSphDict * pDict = sphCreateDictionaryCRC ( tSettings, pIndex->GetTokenizer (), sError, pIndex->GetName() );
1044 		if ( !pDict )
1045 			return false;
1046 
1047 		pIndex->SetDictionary ( pDict );
1048 	}
1049 
1050 	if ( bTokenizerSpawned )
1051 	{
1052 		ISphTokenizer * pTokenizer = pIndex->LeakTokenizer ();
1053 		ISphTokenizer * pTokenFilter = ISphTokenizer::CreateTokenFilter ( pTokenizer, pIndex->GetDictionary ()->GetMultiWordforms () );
1054 		pIndex->SetTokenizer ( pTokenFilter ? pTokenFilter : pTokenizer );
1055 	}
1056 
1057 	if ( !pIndex->IsStripperInited () )
1058 	{
1059 		CSphIndexSettings tSettings = pIndex->GetSettings ();
1060 
1061 		if ( hIndex ( "html_strip" ) )
1062 		{
1063 			tSettings.m_bHtmlStrip = hIndex.GetInt ( "html_strip" )!=0;
1064 			tSettings.m_sHtmlIndexAttrs = hIndex.GetStr ( "html_index_attrs" );
1065 			tSettings.m_sHtmlRemoveElements = hIndex.GetStr ( "html_remove_elements" );
1066 		}
1067 		tSettings.m_sZones = hIndex.GetStr ( "index_zones" );
1068 
1069 		pIndex->Setup ( tSettings );
1070 	}
1071 
1072 	pIndex->PostSetup();
1073 
1074 	return true;
1075 }
1076 
1077 //////////////////////////////////////////////////////////////////////////
1078 
sphLoadConfig(const char * sOptConfig,bool bQuiet,CSphConfigParser & cp)1079 const char * sphLoadConfig ( const char * sOptConfig, bool bQuiet, CSphConfigParser & cp )
1080 {
1081 	// fallback to defaults if there was no explicit config specified
1082 	while ( !sOptConfig )
1083 	{
1084 #ifdef SYSCONFDIR
1085 		sOptConfig = SYSCONFDIR "/sphinx.conf";
1086 		if ( sphIsReadable ( sOptConfig ) )
1087 			break;
1088 #endif
1089 
1090 		sOptConfig = "./sphinx.conf";
1091 		if ( sphIsReadable ( sOptConfig ) )
1092 			break;
1093 
1094 		sOptConfig = NULL;
1095 		break;
1096 	}
1097 
1098 	if ( !sOptConfig )
1099 		sphDie ( "no readable config file (looked in "
1100 #ifdef SYSCONFDIR
1101 		SYSCONFDIR "/sphinx.conf, "
1102 #endif
1103 		"./sphinx.conf)" );
1104 
1105 	if ( !bQuiet )
1106 		fprintf ( stdout, "using config file '%s'...\n", sOptConfig );
1107 
1108 	// load config
1109 	if ( !cp.Parse ( sOptConfig ) )
1110 		sphDie ( "failed to parse config file '%s'", sOptConfig );
1111 
1112 	CSphConfig & hConf = cp.m_tConf;
1113 	if ( !hConf ( "index" ) )
1114 		sphDie ( "no indexes found in config file '%s'", sOptConfig );
1115 
1116 	return sOptConfig;
1117 }
1118 
1119 //////////////////////////////////////////////////////////////////////////
1120 
1121 static SphLogger_fn g_pLogger = NULL;
1122 
Log(ESphLogLevel eLevel,const char * sFmt,va_list ap)1123 inline void Log ( ESphLogLevel eLevel, const char * sFmt, va_list ap )
1124 {
1125 	if ( !g_pLogger ) return;
1126 	( *g_pLogger ) ( eLevel, sFmt, ap );
1127 }
1128 
sphWarning(const char * sFmt,...)1129 void sphWarning ( const char * sFmt, ... )
1130 {
1131 	va_list ap;
1132 	va_start ( ap, sFmt );
1133 	Log ( SPH_LOG_WARNING, sFmt, ap );
1134 	va_end ( ap );
1135 }
1136 
1137 
sphInfo(const char * sFmt,...)1138 void sphInfo ( const char * sFmt, ... )
1139 {
1140 	va_list ap;
1141 	va_start ( ap, sFmt );
1142 	Log ( SPH_LOG_INFO, sFmt, ap );
1143 	va_end ( ap );
1144 }
1145 
sphLogFatal(const char * sFmt,...)1146 void sphLogFatal ( const char * sFmt, ... )
1147 {
1148 	va_list ap;
1149 	va_start ( ap, sFmt );
1150 	Log ( SPH_LOG_FATAL, sFmt, ap );
1151 	va_end ( ap );
1152 }
1153 
sphLogDebug(const char * sFmt,...)1154 void sphLogDebug ( const char * sFmt, ... )
1155 {
1156 	va_list ap;
1157 	va_start ( ap, sFmt );
1158 	Log ( SPH_LOG_DEBUG, sFmt, ap );
1159 	va_end ( ap );
1160 }
1161 
sphLogDebugv(const char * sFmt,...)1162 void sphLogDebugv ( const char * sFmt, ... )
1163 {
1164 	va_list ap;
1165 	va_start ( ap, sFmt );
1166 	Log ( SPH_LOG_VERBOSE_DEBUG, sFmt, ap );
1167 	va_end ( ap );
1168 }
1169 
sphLogDebugvv(const char * sFmt,...)1170 void sphLogDebugvv ( const char * sFmt, ... )
1171 {
1172 	va_list ap;
1173 	va_start ( ap, sFmt );
1174 	Log ( SPH_LOG_VERY_VERBOSE_DEBUG, sFmt, ap );
1175 	va_end ( ap );
1176 }
1177 
sphSetLogger(SphLogger_fn fnLog)1178 void sphSetLogger ( SphLogger_fn fnLog )
1179 {
1180 	g_pLogger = fnLog;
1181 }
1182 
1183 //////////////////////////////////////////////////////////////////////////
1184 // CRASH REPORTING
1185 //////////////////////////////////////////////////////////////////////////
1186 
1187 template <typename Uint>
UItoA(char ** ppOutput,Uint uVal,int iBase=10,int iWidth=0,int iPrec=0,const char cFill=' ')1188 static void UItoA ( char** ppOutput, Uint uVal, int iBase=10, int iWidth=0, int iPrec=0, const char cFill=' ' )
1189 {
1190 	assert ( ppOutput );
1191 	assert ( *ppOutput );
1192 
1193 	const char cDigits[] = "0123456789abcdef";
1194 
1195 	if ( iWidth && iPrec )
1196 	{
1197 		iPrec = iWidth;
1198 		iWidth = 0;
1199 	}
1200 
1201 	if ( !uVal )
1202 	{
1203 		if ( !iPrec && !iWidth )
1204 			*(*ppOutput)++ = cDigits[0];
1205 		else
1206 		{
1207 			while ( iPrec-- )
1208 				*(*ppOutput)++ = cDigits[0];
1209 			if ( iWidth )
1210 			{
1211 				while ( --iWidth )
1212 					*(*ppOutput)++ = cFill;
1213 				*(*ppOutput)++ = cDigits[0];
1214 			}
1215 		}
1216 		return;
1217 	}
1218 
1219 	const BYTE uMaxIndex = 31; // 20 digits for MAX_INT64 in decimal; let it be 31 (32 digits max).
1220 	char CBuf[uMaxIndex+1];
1221 	char *pRes = &CBuf[uMaxIndex];
1222 	char *& pOutput = *ppOutput;
1223 
1224 	while ( uVal )
1225 	{
1226 		*pRes-- = cDigits [ uVal % iBase ];
1227 		uVal /= iBase;
1228 	}
1229 
1230 	BYTE uLen = (BYTE)( uMaxIndex - (pRes-CBuf) );
1231 
1232 	if ( iWidth )
1233 		while ( uLen < iWidth )
1234 		{
1235 			*pOutput++ = cFill;
1236 			iWidth--;
1237 		}
1238 
1239 		if ( iPrec )
1240 		{
1241 			while ( uLen < iPrec )
1242 			{
1243 				*pOutput++=cDigits[0];
1244 				iPrec--;
1245 			}
1246 			iPrec = uLen-iPrec;
1247 		}
1248 
1249 		while ( pRes < CBuf+uMaxIndex-iPrec )
1250 			*pOutput++ = *++pRes;
1251 }
1252 
1253 
sphVSprintf(char * pOutput,const char * sFmt,va_list ap)1254 static int sphVSprintf ( char * pOutput, const char * sFmt, va_list ap )
1255 {
1256 	enum eStates { SNORMAL, SPERCENT, SHAVEFILL, SINWIDTH, SINPREC };
1257 	eStates state = SNORMAL;
1258 	int iPrec = 0;
1259 	int iWidth = 0;
1260 	char cFill = ' ';
1261 	const char * pBegin = pOutput;
1262 	bool bHeadingSpace = true;
1263 
1264 	char c;
1265 	while ( ( c = *sFmt++ )!=0 )
1266 	{
1267 		// handle percent
1268 		if ( c=='%' )
1269 		{
1270 			if ( state==SNORMAL )
1271 			{
1272 				state = SPERCENT;
1273 				iPrec = 0;
1274 				iWidth = 0;
1275 				cFill = ' ';
1276 			} else
1277 			{
1278 				state = SNORMAL;
1279 				*pOutput++ = c;
1280 			}
1281 			continue;
1282 		}
1283 
1284 		// handle regular chars
1285 		if ( state==SNORMAL )
1286 		{
1287 			*pOutput++ = c;
1288 			continue;
1289 		}
1290 
1291 		// handle modifiers
1292 		switch ( c )
1293 		{
1294 		case '0':
1295 			if ( state==SPERCENT )
1296 			{
1297 				cFill = '0';
1298 				state = SHAVEFILL;
1299 				break;
1300 			}
1301 		case '1': case '2': case '3':
1302 		case '4': case '5': case '6':
1303 		case '7': case '8': case '9':
1304 			if ( state==SPERCENT || state==SHAVEFILL )
1305 			{
1306 				state = SINWIDTH;
1307 				iWidth = c - '0';
1308 			} else if ( state==SINWIDTH )
1309 				iWidth = iWidth * 10 + c - '0';
1310 			else if ( state==SINPREC )
1311 				iPrec = iPrec * 10 + c - '0';
1312 			break;
1313 
1314 		case '-':
1315 			if ( state==SPERCENT )
1316 				bHeadingSpace = false;
1317 			else
1318 				state = SNORMAL; // FIXME? means that bad/unhandled syntax with dash will be just ignored
1319 			break;
1320 
1321 		case '.':
1322 			state = SINPREC;
1323 			iPrec = 0;
1324 			break;
1325 
1326 		case 's': // string
1327 			{
1328 				const char * pValue = va_arg ( ap, const char * );
1329 				int iValue = strlen ( pValue );
1330 
1331 				if ( iWidth && bHeadingSpace )
1332 					while ( iValue < iWidth-- )
1333 						*pOutput++ = ' ';
1334 
1335 				if ( iPrec && iPrec < iValue )
1336 					while ( iPrec-- )
1337 						*pOutput++ = *pValue++;
1338 				else
1339 					while ( *pValue )
1340 						*pOutput++ = *pValue++;
1341 
1342 				if ( iWidth && !bHeadingSpace )
1343 					while ( iValue < iWidth-- )
1344 						*pOutput++ = ' ';
1345 
1346 				state = SNORMAL;
1347 				break;
1348 			}
1349 
1350 		case 'p': // pointer
1351 			{
1352 				void * pValue = va_arg ( ap, void * );
1353 				uint64_t uValue = uint64_t ( pValue );
1354 				UItoA ( &pOutput, uValue, 16, iWidth, iPrec, cFill );
1355 				state = SNORMAL;
1356 				break;
1357 			}
1358 
1359 		case 'x': // hex integer
1360 		case 'd': // decimal integer
1361 			{
1362 				DWORD uValue = va_arg ( ap, DWORD );
1363 				UItoA ( &pOutput, uValue, ( c=='x' ) ? 16 : 10, iWidth, iPrec, cFill );
1364 				state = SNORMAL;
1365 				break;
1366 			}
1367 
1368 		case 'l': // decimal int64
1369 			{
1370 				int64_t iValue = va_arg ( ap, int64_t );
1371 				UItoA ( &pOutput, iValue, 10, iWidth, iPrec, cFill );
1372 				state = SNORMAL;
1373 				break;
1374 			}
1375 
1376 		default:
1377 			state = SNORMAL;
1378 			*pOutput++ = c;
1379 		}
1380 	}
1381 
1382 	// final zero to EOL
1383 	*pOutput++ = '\n';
1384 	return pOutput - pBegin;
1385 }
1386 
1387 
sphWrite(int iFD,const void * pBuf,size_t iSize)1388 bool sphWrite ( int iFD, const void * pBuf, size_t iSize )
1389 {
1390 	return ( iSize==(size_t)::write ( iFD, pBuf, iSize ) );
1391 }
1392 
1393 
1394 static char g_sSafeInfoBuf [ 1024 ];
1395 
sphSafeInfo(int iFD,const char * sFmt,...)1396 void sphSafeInfo ( int iFD, const char * sFmt, ... )
1397 {
1398 	if ( iFD<0 || !sFmt )
1399 		return;
1400 
1401 	va_list ap;
1402 	va_start ( ap, sFmt );
1403 	int iLen = sphVSprintf ( g_sSafeInfoBuf, sFmt, ap ); // FIXME! make this vsnprintf
1404 	va_end ( ap );
1405 	sphWrite ( iFD, g_sSafeInfoBuf, iLen );
1406 }
1407 
1408 
1409 #if !USE_WINDOWS
1410 
1411 #define SPH_BACKTRACE_ADDR_COUNT 128
1412 static void * g_pBacktraceAddresses [SPH_BACKTRACE_ADDR_COUNT];
1413 
sphBacktrace(int iFD,bool bSafe)1414 void sphBacktrace ( int iFD, bool bSafe )
1415 {
1416 	if ( iFD<0 )
1417 		return;
1418 
1419 	sphSafeInfo ( iFD, "-------------- backtrace begins here ---------------" );
1420 #ifdef COMPILER
1421 	sphSafeInfo ( iFD, "Program compiled with " COMPILER );
1422 #endif
1423 
1424 #ifdef OS_UNAME
1425 	sphSafeInfo ( iFD, "Host OS is "OS_UNAME );
1426 #endif
1427 
1428 	bool bOk = true;
1429 
1430 	void * pMyStack = NULL;
1431 	int iStackSize = 0;
1432 	if ( !bSafe )
1433 	{
1434 		pMyStack = sphMyStack();
1435 		iStackSize = g_iThreadStackSize;
1436 	}
1437 	sphSafeInfo ( iFD, "Stack bottom = 0x%p, thread stack size = 0x%x", pMyStack, iStackSize );
1438 
1439 	while ( pMyStack && !bSafe )
1440 	{
1441 		sphSafeInfo ( iFD, "begin of manual backtrace:" );
1442 		BYTE ** pFramePointer = NULL;
1443 
1444 		int iFrameCount = 0;
1445 		int iReturnFrameCount = sphIsLtLib() ? 2 : 1;
1446 
1447 #ifdef __i386__
1448 #define SIGRETURN_FRAME_OFFSET 17
1449 		__asm __volatile__ ( "movl %%ebp,%0":"=r"(pFramePointer):"r"(pFramePointer) );
1450 #endif
1451 
1452 #ifdef __x86_64__
1453 #define SIGRETURN_FRAME_OFFSET 23
1454 		__asm __volatile__ ( "movq %%rbp,%0":"=r"(pFramePointer):"r"(pFramePointer) );
1455 #endif
1456 
1457 #ifndef SIGRETURN_FRAME_OFFSET
1458 #define SIGRETURN_FRAME_OFFSET 0
1459 #endif
1460 
1461 		if ( !pFramePointer )
1462 		{
1463 			sphSafeInfo ( iFD, "Frame pointer is null, backtrace failed (did you build with -fomit-frame-pointer?)" );
1464 			break;
1465 		}
1466 
1467 		if ( !pMyStack || (BYTE*) pMyStack > (BYTE*) &pFramePointer )
1468 		{
1469 			int iRound = Min ( 65536, iStackSize );
1470 			pMyStack = (void *) ( ( (size_t) &pFramePointer + iRound ) & ~(size_t)65535 );
1471 			sphSafeInfo ( iFD, "Something wrong with thread stack, backtrace may be incorrect (fp=%p)", pFramePointer );
1472 
1473 			if ( pFramePointer > (BYTE**) pMyStack || pFramePointer < (BYTE**) pMyStack - iStackSize )
1474 			{
1475 				sphSafeInfo ( iFD, "Wrong stack limit or frame pointer, backtrace failed (fp=%p, stack=%p, stacksize=%d)", pFramePointer, pMyStack, iStackSize );
1476 				break;
1477 			}
1478 		}
1479 
1480 		sphSafeInfo ( iFD, "Stack looks OK, attempting backtrace." );
1481 
1482 		BYTE** pNewFP = NULL;
1483 		while ( pFramePointer < (BYTE**) pMyStack )
1484 		{
1485 			pNewFP = (BYTE**) *pFramePointer;
1486 			sphSafeInfo ( iFD, "%p", iFrameCount==iReturnFrameCount? *(pFramePointer + SIGRETURN_FRAME_OFFSET) : *(pFramePointer + 1) );
1487 
1488 			bOk = pNewFP > pFramePointer;
1489 			if ( !bOk ) break;
1490 
1491 			pFramePointer = pNewFP;
1492 			iFrameCount++;
1493 		}
1494 
1495 		if ( !bOk )
1496 			sphSafeInfo ( iFD, "Something wrong in frame pointers, backtrace failed (fp=%p)", pNewFP );
1497 
1498 		break;
1499 	}
1500 
1501 #if HAVE_BACKTRACE
1502 	sphSafeInfo ( iFD, "begin of system backtrace:" );
1503 	int iDepth = backtrace ( g_pBacktraceAddresses, SPH_BACKTRACE_ADDR_COUNT );
1504 #if HAVE_BACKTRACE_SYMBOLS
1505 	sphSafeInfo ( iFD, "begin of system symbols:" );
1506 	backtrace_symbols_fd ( g_pBacktraceAddresses, iDepth, iFD );
1507 #elif !HAVE_BACKTRACE_SYMBOLS
1508 	sphSafeInfo ( iFD, "begin of manual symbols:" );
1509 	for ( int i=0; i<Depth; i++ )
1510 		sphSafeInfo ( iFD, "%p", g_pBacktraceAddresses[i] );
1511 #endif // HAVE_BACKTRACE_SYMBOLS
1512 #endif // !HAVE_BACKTRACE
1513 
1514 	if ( bOk )
1515 		sphSafeInfo ( iFD, "Backtrace looks OK. Now you have to do following steps:\n"
1516 							"  1. Run the command over the crashed binary (for example, 'indexer'):\n"
1517 							"     nm -n indexer > indexer.sym\n"
1518 							"  2. Attach the binary, generated .sym and the text of backtrace (see above) to the bug report.\n"
1519 							"Also you can read the section about resolving backtraces in the documentation.");
1520 	sphSafeInfo ( iFD, "-------------- backtrace ends here ---------------" );
1521 }
1522 
1523 #else // USE_WINDOWS
1524 
sphBacktrace(EXCEPTION_POINTERS * pExc,const char * sFile)1525 void sphBacktrace ( EXCEPTION_POINTERS * pExc, const char * sFile )
1526 {
1527 	if ( !pExc || !sFile || !(*sFile) )
1528 	{
1529 		sphInfo ( "can't generate minidump" );
1530 		return;
1531 	}
1532 
1533 	HANDLE hFile = CreateFile ( sFile, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0 );
1534 	if ( hFile==INVALID_HANDLE_VALUE )
1535 	{
1536 		sphInfo ( "can't create minidump file '%s'", sFile );
1537 		return;
1538 	}
1539 
1540 	MINIDUMP_EXCEPTION_INFORMATION tExcInfo;
1541 	tExcInfo.ExceptionPointers = pExc;
1542 	tExcInfo.ClientPointers = FALSE;
1543 	tExcInfo.ThreadId = GetCurrentThreadId();
1544 
1545 	bool bDumped = ( MiniDumpWriteDump ( GetCurrentProcess(), GetCurrentProcessId(), hFile, MiniDumpNormal, &tExcInfo, 0, 0 )==TRUE );
1546 	CloseHandle ( hFile );
1547 
1548 	if ( !bDumped )
1549 		sphInfo ( "can't dump minidump" );
1550 }
1551 
1552 #endif // USE_WINDOWS
1553 
1554 
1555 static bool g_bUnlinkOld = true;
sphSetUnlinkOld(bool bUnlink)1556 void sphSetUnlinkOld ( bool bUnlink )
1557 {
1558 	g_bUnlinkOld = bUnlink;
1559 }
1560 
1561 
sphUnlinkIndex(const char * sName,bool bForce)1562 void sphUnlinkIndex ( const char * sName, bool bForce )
1563 {
1564 	if ( !( g_bUnlinkOld || bForce ) )
1565 		return;
1566 
1567 	// FIXME! ext list must be in sync with sphinx.cpp, searchd.cpp
1568 	const int EXT_COUNT = 9;
1569 	const char * dCurExts[EXT_COUNT] = { ".sph", ".spa", ".spi", ".spd", ".spp", ".spm", ".spk", ".sps", ".mvp" };
1570 	char sFileName[SPH_MAX_FILENAME_LEN];
1571 
1572 	for ( int j=0; j<EXT_COUNT; j++ )
1573 	{
1574 		snprintf ( sFileName, sizeof(sFileName), "%s%s", sName, dCurExts[j] );
1575 		// 'mvp' is optional file
1576 		if ( ::unlink ( sFileName ) && errno!=ENOENT )
1577 			sphWarning ( "unlink failed (file '%s', error '%s'", sFileName, strerror(errno) );
1578 	}
1579 }
1580 
1581 
1582 //
1583 // $Id: sphinxutils.cpp 4113 2013-08-26 07:43:28Z deogar $
1584 //
1585