1 //
2 // $Id: sphinxutils.cpp 4113 2013-08-26 07:43:28Z deogar $
3 //
4
5 //
6 // Copyright (c) 2001-2013, Andrew Aksyonoff
7 // Copyright (c) 2008-2013, Sphinx Technologies Inc
8 // All rights reserved
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15
16 /// @file sphinxutils.cpp
17 /// Implementations for Sphinx utilities shared classes.
18
19 #include "sphinx.h"
20 #include "sphinxutils.h"
21 #include <ctype.h>
22 #include <fcntl.h>
23 #include <errno.h>
24 #if HAVE_EXECINFO_H
25 #include <execinfo.h>
26 #endif
27
28 #if USE_WINDOWS
29 #include <io.h> // for ::open on windows
30 #include <dbghelp.h>
31 #pragma comment(linker, "/defaultlib:dbghelp.lib")
32 #pragma message("Automatically linking with dbghelp.lib")
33 #else
34 #include <sys/wait.h>
35 #include <signal.h>
36 #endif
37
38 /////////////////////////////////////////////////////////////////////////////
39
ltrim(char * sLine)40 static char * ltrim ( char * sLine )
41 {
42 while ( *sLine && isspace(*sLine) )
43 sLine++;
44 return sLine;
45 }
46
47
rtrim(char * sLine)48 static char * rtrim ( char * sLine )
49 {
50 char * p = sLine + strlen(sLine) - 1;
51 while ( p>=sLine && isspace(*p) )
52 p--;
53 p[1] = '\0';
54 return sLine;
55 }
56
57
trim(char * sLine)58 static char * trim ( char * sLine )
59 {
60 return ltrim ( rtrim ( sLine ) );
61 }
62
63 //////////////////////////////////////////////////////////////////////////
64
GetSize(const char * sKey,int iDefault) const65 int CSphConfigSection::GetSize ( const char * sKey, int iDefault ) const
66 {
67 CSphVariant * pEntry = (*this)( sKey );
68 if ( !pEntry )
69 return iDefault;
70
71 char sMemLimit[256];
72 strncpy ( sMemLimit, pEntry->cstr(), sizeof(sMemLimit) );
73 sMemLimit [ sizeof(sMemLimit)-1 ] = '\0';
74
75 int iLen = strlen ( sMemLimit );
76 if ( !iLen )
77 return iDefault;
78
79 iLen--;
80 int iScale = 1;
81 if ( toupper ( sMemLimit[iLen] )=='K' )
82 {
83 iScale = 1024;
84 sMemLimit[iLen] = '\0';
85
86 } else if ( toupper ( sMemLimit[iLen] )=='M' )
87 {
88 iScale = 1048576;
89 sMemLimit[iLen] = '\0';
90 }
91
92 char * sErr;
93 int64_t iRes = strtoll ( sMemLimit, &sErr, 10 );
94
95 if ( !*sErr )
96 {
97 iRes *= iScale;
98 if ( iRes>INT_MAX )
99 {
100 sphWarning ( "'%s = %s' clamped to INT_MAX", sKey, pEntry->cstr() );
101 iRes = INT_MAX;
102 }
103 } else
104 {
105 sphWarning ( "'%s = %s' parse error '%s'", sKey, pEntry->cstr(), sErr );
106 iRes = iDefault;
107 }
108
109 return (int)iRes;
110 }
111
112 //////////////////////////////////////////////////////////////////////////
113 // CONFIG PARSER
114 //////////////////////////////////////////////////////////////////////////
115
116 /// key flags
117 enum
118 {
119 KEY_DEPRECATED = 1UL<<0,
120 KEY_LIST = 1UL<<1
121 };
122
123 /// key descriptor for validation purposes
124 struct KeyDesc_t
125 {
126 const char * m_sKey; ///< key name
127 int m_iFlags; ///< flags
128 const char * m_sExtra; ///< extra stuff (deprecated name, for now)
129 };
130
131 /// allowed keys for source section
132 static KeyDesc_t g_dKeysSource[] =
133 {
134 { "type", 0, NULL },
135 { "strip_html", KEY_DEPRECATED, "html_strip (per-index)" },
136 { "index_html_attrs", KEY_DEPRECATED, "html_index_attrs (per-index)" },
137 { "sql_host", 0, NULL },
138 { "sql_user", 0, NULL },
139 { "sql_pass", 0, NULL },
140 { "sql_db", 0, NULL },
141 { "sql_port", 0, NULL },
142 { "sql_sock", 0, NULL },
143 { "mysql_connect_flags", 0, NULL },
144 { "mysql_ssl_key", 0, NULL },
145 { "mysql_ssl_cert", 0, NULL },
146 { "mysql_ssl_ca", 0, NULL },
147 { "mssql_winauth", 0, NULL },
148 { "mssql_unicode", 0, NULL },
149 { "sql_query_pre", KEY_LIST, NULL },
150 { "sql_query", 0, NULL },
151 { "sql_query_range", 0, NULL },
152 { "sql_range_step", 0, NULL },
153 { "sql_query_killlist", 0, NULL },
154 { "sql_attr_uint", KEY_LIST, NULL },
155 { "sql_attr_bool", KEY_LIST, NULL },
156 { "sql_attr_timestamp", KEY_LIST, NULL },
157 { "sql_attr_str2ordinal", KEY_LIST, NULL },
158 { "sql_attr_float", KEY_LIST, NULL },
159 { "sql_attr_bigint", KEY_LIST, NULL },
160 { "sql_attr_multi", KEY_LIST, NULL },
161 { "sql_query_post", KEY_LIST, NULL },
162 { "sql_query_post_index", KEY_LIST, NULL },
163 { "sql_ranged_throttle", 0, NULL },
164 { "sql_query_info", 0, NULL },
165 { "xmlpipe_command", 0, NULL },
166 { "xmlpipe_field", KEY_LIST, NULL },
167 { "xmlpipe_attr_uint", KEY_LIST, NULL },
168 { "xmlpipe_attr_timestamp", KEY_LIST, NULL },
169 { "xmlpipe_attr_str2ordinal", KEY_LIST, NULL },
170 { "xmlpipe_attr_bool", KEY_LIST, NULL },
171 { "xmlpipe_attr_float", KEY_LIST, NULL },
172 { "xmlpipe_attr_bigint", KEY_LIST, NULL },
173 { "xmlpipe_attr_multi", KEY_LIST, NULL },
174 { "xmlpipe_attr_multi_64", KEY_LIST, NULL },
175 { "xmlpipe_attr_string", KEY_LIST, NULL },
176 { "xmlpipe_attr_wordcount", KEY_LIST, NULL },
177 { "xmlpipe_field_string", KEY_LIST, NULL },
178 { "xmlpipe_field_wordcount", KEY_LIST, NULL },
179 { "xmlpipe_fixup_utf8", 0, NULL },
180 { "sql_group_column", KEY_LIST | KEY_DEPRECATED, "sql_attr_uint" },
181 { "sql_date_column", KEY_LIST | KEY_DEPRECATED, "sql_attr_timestamp" },
182 { "sql_str2ordinal_column", KEY_LIST | KEY_DEPRECATED, "sql_attr_str2ordinal" },
183 { "unpack_zlib", KEY_LIST, NULL },
184 { "unpack_mysqlcompress", KEY_LIST, NULL },
185 { "unpack_mysqlcompress_maxsize", 0, NULL },
186 { "odbc_dsn", 0, NULL },
187 { "sql_joined_field", KEY_LIST, NULL },
188 { "sql_attr_string", KEY_LIST, NULL },
189 { "sql_attr_str2wordcount", KEY_LIST, NULL },
190 { "sql_field_string", KEY_LIST, NULL },
191 { "sql_field_str2wordcount", KEY_LIST, NULL },
192 { "sql_file_field", KEY_LIST, NULL },
193 { "sql_column_buffers", 0, NULL },
194 { NULL, 0, NULL }
195 };
196
197 /// allowed keys for index section
198 static KeyDesc_t g_dKeysIndex[] =
199 {
200 { "source", KEY_LIST, NULL },
201 { "path", 0, NULL },
202 { "docinfo", 0, NULL },
203 { "mlock", 0, NULL },
204 { "morphology", 0, NULL },
205 { "stopwords", 0, NULL },
206 { "synonyms", KEY_DEPRECATED, "exceptions" },
207 { "exceptions", 0, NULL },
208 { "wordforms", 0, NULL },
209 { "min_word_len", 0, NULL },
210 { "charset_type", 0, NULL },
211 { "charset_table", 0, NULL },
212 { "ignore_chars", 0, NULL },
213 { "min_prefix_len", 0, NULL },
214 { "min_infix_len", 0, NULL },
215 { "prefix_fields", 0, NULL },
216 { "infix_fields", 0, NULL },
217 { "enable_star", 0, NULL },
218 { "ngram_len", 0, NULL },
219 { "ngram_chars", 0, NULL },
220 { "phrase_boundary", 0, NULL },
221 { "phrase_boundary_step", 0, NULL },
222 { "ondisk_dict", 0, NULL },
223 { "type", 0, NULL },
224 { "local", KEY_LIST, NULL },
225 { "agent", KEY_LIST, NULL },
226 { "agent_blackhole", KEY_LIST, NULL },
227 { "agent_connect_timeout", 0, NULL },
228 { "agent_query_timeout", 0, NULL },
229 { "html_strip", 0, NULL },
230 { "html_index_attrs", 0, NULL },
231 { "html_remove_elements", 0, NULL },
232 { "preopen", 0, NULL },
233 { "inplace_enable", 0, NULL },
234 { "inplace_hit_gap", 0, NULL },
235 { "inplace_docinfo_gap", 0, NULL },
236 { "inplace_reloc_factor", 0, NULL },
237 { "inplace_write_factor", 0, NULL },
238 { "index_exact_words", 0, NULL },
239 { "min_stemming_len", 0, NULL },
240 { "overshort_step", 0, NULL },
241 { "stopword_step", 0, NULL },
242 { "blend_chars", 0, NULL },
243 { "expand_keywords", 0, NULL },
244 { "hitless_words", 0, NULL },
245 { "hit_format", 0, NULL },
246 { "rt_field", KEY_LIST, NULL },
247 { "rt_attr_uint", KEY_LIST, NULL },
248 { "rt_attr_bigint", KEY_LIST, NULL },
249 { "rt_attr_float", KEY_LIST, NULL },
250 { "rt_attr_timestamp", KEY_LIST, NULL },
251 { "rt_attr_string", KEY_LIST, NULL },
252 { "rt_attr_multi", KEY_LIST, NULL },
253 { "rt_attr_multi_64", KEY_LIST, NULL },
254 { "rt_mem_limit", 0, NULL },
255 { "dict", 0, NULL },
256 { "index_sp", 0, NULL },
257 { "index_zones", 0, NULL },
258 { "blend_mode", 0, NULL },
259 { NULL, 0, NULL }
260 };
261
262 /// allowed keys for indexer section
263 static KeyDesc_t g_dKeysIndexer[] =
264 {
265 { "mem_limit", 0, NULL },
266 { "max_iops", 0, NULL },
267 { "max_iosize", 0, NULL },
268 { "max_xmlpipe2_field", 0, NULL },
269 { "max_file_field_buffer", 0, NULL },
270 { "write_buffer", 0, NULL },
271 { "on_file_field_error", 0, NULL },
272 { NULL, 0, NULL }
273 };
274
275 /// allowed keys for searchd section
276 static KeyDesc_t g_dKeysSearchd[] =
277 {
278 { "address", KEY_DEPRECATED, "listen" },
279 { "port", 0, NULL },
280 { "listen", KEY_LIST, NULL },
281 { "log", 0, NULL },
282 { "query_log", 0, NULL },
283 { "read_timeout", 0, NULL },
284 { "client_timeout", 0, NULL },
285 { "max_children", 0, NULL },
286 { "pid_file", 0, NULL },
287 { "max_matches", 0, NULL },
288 { "seamless_rotate", 0, NULL },
289 { "preopen_indexes", 0, NULL },
290 { "unlink_old", 0, NULL },
291 { "ondisk_dict_default", 0, NULL },
292 { "attr_flush_period", 0, NULL },
293 { "max_packet_size", 0, NULL },
294 { "mva_updates_pool", 0, NULL },
295 { "crash_log_path", KEY_DEPRECATED, NULL },
296 { "max_filters", 0, NULL },
297 { "max_filter_values", 0, NULL },
298 { "listen_backlog", 0, NULL },
299 { "read_buffer", 0, NULL },
300 { "read_unhinted", 0, NULL },
301 { "max_batch_queries", 0, NULL },
302 { "subtree_docs_cache", 0, NULL },
303 { "subtree_hits_cache", 0, NULL },
304 { "workers", 0, NULL },
305 { "prefork", 0, NULL },
306 { "dist_threads", 0, NULL },
307 { "binlog_flush", 0, NULL },
308 { "binlog_path", 0, NULL },
309 { "binlog_max_log_size", 0, NULL },
310 { "thread_stack", 0, NULL },
311 { "expansion_limit", 0, NULL },
312 { "compat_sphinxql_magics", 0, NULL },
313 { "rt_flush_period", 0, NULL },
314 { "query_log_format", 0, NULL },
315 { "mysql_version_string", 0, NULL },
316 { "plugin_dir", 0, NULL },
317 { "collation_server", 0, NULL },
318 { "collation_libc_locale", 0, NULL },
319 { "watchdog", 0, NULL },
320 { "prefork_rotation_throttle", 0, NULL },
321 { NULL, 0, NULL }
322 };
323
324 //////////////////////////////////////////////////////////////////////////
325
CSphConfigParser()326 CSphConfigParser::CSphConfigParser ()
327 : m_sFileName ( "" )
328 , m_iLine ( -1 )
329 {
330 }
331
332
IsPlainSection(const char * sKey)333 bool CSphConfigParser::IsPlainSection ( const char * sKey )
334 {
335 if ( !strcasecmp ( sKey, "indexer" ) ) return true;
336 if ( !strcasecmp ( sKey, "searchd" ) ) return true;
337 if ( !strcasecmp ( sKey, "search" ) ) return true;
338 return false;
339 }
340
341
IsNamedSection(const char * sKey)342 bool CSphConfigParser::IsNamedSection ( const char * sKey )
343 {
344 if ( !strcasecmp ( sKey, "source" ) ) return true;
345 if ( !strcasecmp ( sKey, "index" ) ) return true;
346 return false;
347 }
348
349
AddSection(const char * sType,const char * sName)350 bool CSphConfigParser::AddSection ( const char * sType, const char * sName )
351 {
352 m_sSectionType = sType;
353 m_sSectionName = sName;
354
355 if ( !m_tConf.Exists ( m_sSectionType ) )
356 m_tConf.Add ( CSphConfigType(), m_sSectionType ); // FIXME! be paranoid, verify that it returned true
357
358 if ( m_tConf[m_sSectionType].Exists ( m_sSectionName ) )
359 {
360 snprintf ( m_sError, sizeof(m_sError), "section '%s' (type='%s') already exists", sName, sType );
361 return false;
362 }
363 m_tConf[m_sSectionType].Add ( CSphConfigSection(), m_sSectionName ); // FIXME! be paranoid, verify that it returned true
364
365 return true;
366 }
367
368
AddKey(const char * sKey,char * sValue)369 void CSphConfigParser::AddKey ( const char * sKey, char * sValue )
370 {
371 assert ( m_tConf.Exists ( m_sSectionType ) );
372 assert ( m_tConf[m_sSectionType].Exists ( m_sSectionName ) );
373
374 sValue = trim ( sValue );
375 CSphConfigSection & tSec = m_tConf[m_sSectionType][m_sSectionName];
376 if ( tSec(sKey) )
377 {
378 if ( tSec[sKey].m_bTag )
379 {
380 // override value or list with a new value
381 SafeDelete ( tSec[sKey].m_pNext ); // only leave the first array element
382 tSec[sKey] = sValue; // update its value
383 tSec[sKey].m_bTag = false; // mark it as overridden
384
385 } else
386 {
387 // chain to tail, to keep the order
388 CSphVariant * pTail = &tSec[sKey];
389 while ( pTail->m_pNext )
390 pTail = pTail->m_pNext;
391 pTail->m_pNext = new CSphVariant ( sValue );
392 }
393
394 } else
395 {
396 // just add
397 tSec.Add ( sValue, sKey ); // FIXME! be paranoid, verify that it returned true
398 }
399 }
400
401
ValidateKey(const char * sKey)402 bool CSphConfigParser::ValidateKey ( const char * sKey )
403 {
404 // get proper descriptor table
405 // OPTIMIZE! move lookup to AddSection
406 const KeyDesc_t * pDesc = NULL;
407 if ( m_sSectionType=="source" ) pDesc = g_dKeysSource;
408 else if ( m_sSectionType=="index" ) pDesc = g_dKeysIndex;
409 else if ( m_sSectionType=="indexer" ) pDesc = g_dKeysIndexer;
410 else if ( m_sSectionType=="searchd" ) pDesc = g_dKeysSearchd;
411 if ( !pDesc )
412 {
413 snprintf ( m_sError, sizeof(m_sError), "unknown section type '%s'", m_sSectionType.cstr() );
414 return false;
415 }
416
417 // check if the key is known
418 while ( pDesc->m_sKey && strcasecmp ( pDesc->m_sKey, sKey ) )
419 pDesc++;
420 if ( !pDesc->m_sKey )
421 {
422 snprintf ( m_sError, sizeof(m_sError), "unknown key name '%s'", sKey );
423 return false;
424 }
425
426 // warn about deprecate keys
427 if ( pDesc->m_iFlags & KEY_DEPRECATED )
428 if ( ++m_iWarnings<=WARNS_THRESH )
429 fprintf ( stdout, "WARNING: key '%s' is deprecated in %s line %d; use '%s' instead.\n", sKey, m_sFileName.cstr(), m_iLine, pDesc->m_sExtra );
430
431 // warn about list/non-list keys
432 if (!( pDesc->m_iFlags & KEY_LIST ))
433 {
434 CSphConfigSection & tSec = m_tConf[m_sSectionType][m_sSectionName];
435 if ( tSec(sKey) && !tSec[sKey].m_bTag )
436 if ( ++m_iWarnings<=WARNS_THRESH )
437 fprintf ( stdout, "WARNING: key '%s' is not multi-value; value in %s line %d will be ignored.\n", sKey, m_sFileName.cstr(), m_iLine );
438 }
439
440 return true;
441 }
442
443 #if !USE_WINDOWS
444
TryToExec(char * pBuffer,const char * szFilename,CSphVector<char> & dResult,char * sError,int iErrorLen)445 bool TryToExec ( char * pBuffer, const char * szFilename, CSphVector<char> & dResult, char * sError, int iErrorLen )
446 {
447 int dPipe[2] = { -1, -1 };
448
449 if ( pipe ( dPipe ) )
450 {
451 snprintf ( sError, iErrorLen, "pipe() failed (error=%s)", strerror(errno) );
452 return false;
453 }
454
455 pBuffer = trim ( pBuffer );
456
457 int iRead = dPipe[0];
458 int iWrite = dPipe[1];
459
460 int iChild = fork();
461
462 if ( iChild==0 )
463 {
464 close ( iRead );
465 close ( STDOUT_FILENO );
466 dup2 ( iWrite, STDOUT_FILENO );
467
468 char * pPtr = pBuffer;
469 char * pArgs = NULL;
470 while ( *pPtr )
471 {
472 if ( sphIsSpace ( *pPtr ) )
473 {
474 *pPtr = '\0';
475 pArgs = trim ( pPtr+1 );
476 break;
477 }
478
479 pPtr++;
480 }
481
482 if ( pArgs )
483 execl ( pBuffer, pBuffer, pArgs, szFilename, (char*)NULL );
484 else
485 execl ( pBuffer, pBuffer, szFilename, (char*)NULL );
486
487 exit ( 1 );
488
489 } else if ( iChild==-1 )
490 {
491 snprintf ( sError, iErrorLen, "fork failed: [%d] %s", errno, strerror(errno) );
492 return false;
493 }
494
495 close ( iWrite );
496
497 int iBytesRead, iTotalRead = 0;
498 const int BUFFER_SIZE = 65536;
499
500 dResult.Reset ();
501
502 do
503 {
504 dResult.Resize ( iTotalRead + BUFFER_SIZE );
505 for ( ;; )
506 {
507 iBytesRead = read ( iRead, (void*)&(dResult [iTotalRead]), BUFFER_SIZE );
508 if ( iBytesRead==-1 && errno==EINTR ) // we can get SIGCHLD just before eof
509 continue;
510 break;
511 }
512 iTotalRead += iBytesRead;
513 }
514 while ( iBytesRead > 0 );
515 close ( iRead );
516
517 int iStatus, iResult;
518 do
519 {
520 // can be interrupted by pretty much anything (e.g. SIGCHLD from other searchd children)
521 iResult = waitpid ( iChild, &iStatus, 0 );
522
523 // they say this can happen if child exited and SIGCHLD was ignored
524 // a cleaner one would be to temporary handle it here, but can we be bothered
525 if ( iResult==-1 && errno==ECHILD )
526 {
527 iResult = iChild;
528 iStatus = 0;
529 }
530
531 if ( iResult==-1 && errno!=EINTR )
532 {
533 snprintf ( sError, iErrorLen, "waitpid() failed: [%d] %s", errno, strerror(errno) );
534 return false;
535 }
536 }
537 while ( iResult!=iChild );
538
539 if ( WIFEXITED ( iStatus ) && WEXITSTATUS ( iStatus ) )
540 {
541 // FIXME? read stderr and log that too
542 snprintf ( sError, iErrorLen, "error executing '%s' status = %d", pBuffer, WEXITSTATUS ( iStatus ) );
543 return false;
544 }
545
546 if ( WIFSIGNALED ( iStatus ) )
547 {
548 snprintf ( sError, iErrorLen, "error executing '%s', killed by signal %d", pBuffer, WTERMSIG ( iStatus ) );
549 return false;
550 }
551
552 if ( iBytesRead < 0 )
553 {
554 snprintf ( sError, iErrorLen, "pipe read error: [%d] %s", errno, strerror(errno) );
555 return false;
556 }
557
558 dResult.Resize ( iTotalRead + 1 );
559 dResult [iTotalRead] = '\0';
560
561 return true;
562 }
563
TryToExec(char * pBuffer,const char * szFilename,CSphVector<char> & dResult)564 bool CSphConfigParser::TryToExec ( char * pBuffer, const char * szFilename, CSphVector<char> & dResult )
565 {
566 return ::TryToExec ( pBuffer, szFilename, dResult, m_sError, sizeof(m_sError) );
567 }
568 #endif
569
570
GetBufferString(char * szDest,int iMax,const char * & szSource)571 char * CSphConfigParser::GetBufferString ( char * szDest, int iMax, const char * & szSource )
572 {
573 int nCopied = 0;
574
575 while ( nCopied < iMax-1 && szSource[nCopied] && ( nCopied==0 || szSource[nCopied-1]!='\n' ) )
576 {
577 szDest [nCopied] = szSource [nCopied];
578 nCopied++;
579 }
580
581 if ( !nCopied )
582 return NULL;
583
584 szSource += nCopied;
585 szDest [nCopied] = '\0';
586
587 return szDest;
588 }
589
ReParse(const char * sFileName,const char * pBuffer)590 bool CSphConfigParser::ReParse ( const char * sFileName, const char * pBuffer )
591 {
592 CSphConfig tOldConfig = m_tConf;
593 m_tConf.Reset();
594 if ( Parse ( sFileName, pBuffer ) )
595 return true;
596 m_tConf = tOldConfig;
597 return false;
598 }
599
Parse(const char * sFileName,const char * pBuffer)600 bool CSphConfigParser::Parse ( const char * sFileName, const char * pBuffer )
601 {
602 const int L_STEPBACK = 16;
603 const int L_TOKEN = 64;
604 const int L_BUFFER = 8192;
605
606 FILE * fp = NULL;
607 if ( !pBuffer )
608 {
609 // open file
610 fp = fopen ( sFileName, "rb" );
611 if ( !fp )
612 return false;
613 }
614
615 // init parser
616 m_sFileName = sFileName;
617 m_iLine = 0;
618 m_iWarnings = 0;
619
620 char * p = NULL;
621 char * pEnd = NULL;
622
623 char sBuf [ L_BUFFER ] = { 0 };
624
625 char sToken [ L_TOKEN ] = { 0 };
626 int iToken = 0;
627 int iCh = -1;
628
629 enum { S_TOP, S_SKIP2NL, S_TOK, S_TYPE, S_SEC, S_CHR, S_VALUE, S_SECNAME, S_SECBASE, S_KEY } eState = S_TOP, eStack[8];
630 int iStack = 0;
631
632 int iValue = 0, iValueMax = 65535;
633 char * sValue = new char [ iValueMax+1 ];
634
635 #define LOC_ERROR(_msg) { strncpy ( m_sError, _msg, sizeof(m_sError) ); break; }
636 #define LOC_ERROR2(_msg,_a) { snprintf ( m_sError, sizeof(m_sError), _msg, _a ); break; }
637 #define LOC_ERROR3(_msg,_a,_b) { snprintf ( m_sError, sizeof(m_sError), _msg, _a, _b ); break; }
638 #define LOC_ERROR4(_msg,_a,_b,_c) { snprintf ( m_sError, sizeof(m_sError), _msg, _a, _b, _c ); break; }
639
640 #define LOC_PUSH(_new) { assert ( iStack<int(sizeof(eStack)/sizeof(eState)) ); eStack[iStack++] = eState; eState = _new; }
641 #define LOC_POP() { assert ( iStack>0 ); eState = eStack[--iStack]; }
642 #define LOC_BACK() { p--; }
643
644 m_sError[0] = '\0';
645
646 for ( ; ; p++ )
647 {
648 // if this line is over, load next line
649 if ( p>=pEnd )
650 {
651 char * szResult = pBuffer ? GetBufferString ( sBuf, L_BUFFER, pBuffer ) : fgets ( sBuf, L_BUFFER, fp );
652 if ( !szResult )
653 break; // FIXME! check for read error
654
655 m_iLine++;
656 int iLen = strlen(sBuf);
657 if ( iLen<=0 )
658 LOC_ERROR ( "internal error; fgets() returned empty string" );
659
660 p = sBuf;
661 pEnd = sBuf + iLen;
662 if ( pEnd[-1]!='\n' )
663 {
664 if ( iLen==L_BUFFER-1 )
665 LOC_ERROR ( "line too long" );
666 }
667 }
668
669 // handle S_TOP state
670 if ( eState==S_TOP )
671 {
672 if ( isspace(*p) ) continue;
673
674 if ( *p=='#' )
675 {
676 #if !USE_WINDOWS
677 if ( !pBuffer && m_iLine==1 && p==sBuf && p[1]=='!' )
678 {
679 CSphVector<char> dResult;
680 if ( TryToExec ( p+2, sFileName, dResult ) )
681 Parse ( sFileName, &dResult[0] );
682 break;
683 } else
684 #endif
685 {
686 LOC_PUSH ( S_SKIP2NL );
687 continue;
688 }
689 }
690
691 if ( !sphIsAlpha(*p) ) LOC_ERROR ( "invalid token" );
692 iToken = 0; LOC_PUSH ( S_TYPE ); LOC_PUSH ( S_TOK ); LOC_BACK(); continue;
693 }
694
695 // handle S_SKIP2NL state
696 if ( eState==S_SKIP2NL )
697 {
698 LOC_POP ();
699 p = pEnd;
700 continue;
701 }
702
703 // handle S_TOK state
704 if ( eState==S_TOK )
705 {
706 if ( !iToken && !sphIsAlpha(*p) )LOC_ERROR ( "internal error (non-alpha in S_TOK pos 0)" );
707 if ( iToken==sizeof(sToken) ) LOC_ERROR ( "token too long" );
708 if ( !sphIsAlpha(*p) ) { LOC_POP (); sToken [ iToken ] = '\0'; iToken = 0; LOC_BACK(); continue; }
709 if ( !iToken ) { sToken[0] = '\0'; }
710 sToken [ iToken++ ] = *p; continue;
711 }
712
713 // handle S_TYPE state
714 if ( eState==S_TYPE )
715 {
716 if ( isspace(*p) ) continue;
717 if ( *p=='#' ) { LOC_PUSH ( S_SKIP2NL ); continue; }
718 if ( !sToken[0] ) { LOC_ERROR ( "internal error (empty token in S_TYPE)" ); }
719 if ( IsPlainSection(sToken) ) { if ( !AddSection ( sToken, sToken ) ) break; sToken[0] = '\0'; LOC_POP (); LOC_PUSH ( S_SEC ); LOC_PUSH ( S_CHR ); iCh = '{'; LOC_BACK(); continue; }
720 if ( IsNamedSection(sToken) ) { m_sSectionType = sToken; sToken[0] = '\0'; LOC_POP (); LOC_PUSH ( S_SECNAME ); LOC_BACK(); continue; }
721 LOC_ERROR2 ( "invalid section type '%s'", sToken );
722 }
723
724 // handle S_CHR state
725 if ( eState==S_CHR )
726 {
727 if ( isspace(*p) ) continue;
728 if ( *p=='#' ) { LOC_PUSH ( S_SKIP2NL ); continue; }
729 if ( *p!=iCh ) LOC_ERROR3 ( "expected '%c', got '%c'", iCh, *p );
730 LOC_POP (); continue;
731 }
732
733 // handle S_SEC state
734 if ( eState==S_SEC )
735 {
736 if ( isspace(*p) ) continue;
737 if ( *p=='#' ) { LOC_PUSH ( S_SKIP2NL ); continue; }
738 if ( *p=='}' ) { LOC_POP (); continue; }
739 if ( sphIsAlpha(*p) ) { LOC_PUSH ( S_KEY ); LOC_PUSH ( S_TOK ); LOC_BACK(); iValue = 0; sValue[0] = '\0'; continue; }
740 LOC_ERROR2 ( "section contents: expected token, got '%c'", *p );
741 }
742
743 // handle S_KEY state
744 if ( eState==S_KEY )
745 {
746 // validate the key
747 if ( !ValidateKey ( sToken ) )
748 break;
749
750 // an assignment operator and a value must follow
751 LOC_POP (); LOC_PUSH ( S_VALUE ); LOC_PUSH ( S_CHR ); iCh = '=';
752 LOC_BACK(); // because we did not work the char at all
753 continue;
754 }
755
756 // handle S_VALUE state
757 if ( eState==S_VALUE )
758 {
759 if ( *p=='\n' ) { AddKey ( sToken, sValue ); iValue = 0; LOC_POP (); continue; }
760 if ( *p=='#' ) { AddKey ( sToken, sValue ); iValue = 0; LOC_POP (); LOC_PUSH ( S_SKIP2NL ); continue; }
761 if ( *p=='\\' )
762 {
763 // backslash at the line end: continuation operator; let the newline be unhanlded
764 if ( p[1]=='\r' || p[1]=='\n' ) { LOC_PUSH ( S_SKIP2NL ); continue; }
765
766 // backslash before number sign: comment start char escaping; advance and pass it
767 if ( p[1]=='#' ) { p++; }
768
769 // otherwise: just a char, pass it
770 }
771 if ( iValue<iValueMax ) { sValue[iValue++] = *p; sValue[iValue] = '\0'; }
772 continue;
773 }
774
775 // handle S_SECNAME state
776 if ( eState==S_SECNAME )
777 {
778 if ( isspace(*p) ) { continue; }
779 if ( !sToken[0]&&!sphIsAlpha(*p)) { LOC_ERROR2 ( "named section: expected name, got '%c'", *p ); }
780
781 if ( !sToken[0] ) { LOC_PUSH ( S_TOK ); LOC_BACK(); continue; }
782 if ( !AddSection ( m_sSectionType.cstr(), sToken ) ) break; sToken[0] = '\0';
783 if ( *p==':' ) { eState = S_SECBASE; continue; }
784 if ( *p=='{' ) { eState = S_SEC; continue; }
785 LOC_ERROR2 ( "named section: expected ':' or '{', got '%c'", *p );
786 }
787
788 // handle S_SECBASE state
789 if ( eState==S_SECBASE )
790 {
791 if ( isspace(*p) ) { continue; }
792 if ( !sToken[0]&&!sphIsAlpha(*p)) { LOC_ERROR2 ( "named section: expected parent name, got '%c'", *p ); }
793 if ( !sToken[0] ) { LOC_PUSH ( S_TOK ); LOC_BACK(); continue; }
794
795 // copy the section
796 assert ( m_tConf.Exists ( m_sSectionType ) );
797
798 if ( !m_tConf [ m_sSectionType ].Exists ( sToken ) )
799 LOC_ERROR4 ( "inherited section '%s': parent doesn't exist (parent name='%s', type='%s')",
800 m_sSectionName.cstr(), sToken, m_sSectionType.cstr() );
801
802 CSphConfigSection & tDest = m_tConf [ m_sSectionType ][ m_sSectionName ];
803 tDest = m_tConf [ m_sSectionType ][ sToken ];
804
805 // mark all values in the target section as "to be overridden"
806 tDest.IterateStart ();
807 while ( tDest.IterateNext() )
808 tDest.IterateGet().m_bTag = true;
809
810 LOC_BACK();
811 eState = S_SEC;
812 LOC_PUSH ( S_CHR );
813 iCh = '{';
814 continue;
815 }
816
817 LOC_ERROR2 ( "internal error (unhandled state %d)", eState );
818 }
819
820 #undef LOC_POP
821 #undef LOC_PUSH
822 #undef LOC_ERROR
823
824 if ( !pBuffer )
825 fclose ( fp );
826
827 SafeDeleteArray ( sValue );
828
829 if ( m_iWarnings>WARNS_THRESH )
830 fprintf ( stdout, "WARNING: %d more warnings skipped.\n", m_iWarnings-WARNS_THRESH );
831
832 if ( strlen(m_sError) )
833 {
834 int iCol = (int)(p-sBuf+1);
835
836 int iCtx = Min ( L_STEPBACK, iCol ); // error context is upto L_STEPBACK chars back, but never going to prev line
837 const char * sCtx = p-iCtx+1;
838 if ( sCtx<sBuf )
839 sCtx = sBuf;
840
841 char sStepback [ L_STEPBACK+1 ];
842 memcpy ( sStepback, sCtx, iCtx );
843 sStepback[iCtx] = '\0';
844
845 fprintf ( stdout, "ERROR: %s in %s line %d col %d.\n", m_sError, m_sFileName.cstr(), m_iLine, iCol );
846 return false;
847 }
848
849 return true;
850 }
851
852 /////////////////////////////////////////////////////////////////////////////
853
sphConfTokenizer(const CSphConfigSection & hIndex,CSphTokenizerSettings & tSettings,CSphString & sError)854 bool sphConfTokenizer ( const CSphConfigSection & hIndex, CSphTokenizerSettings & tSettings, CSphString & sError )
855 {
856 tSettings.m_iNgramLen = Max ( hIndex.GetInt ( "ngram_len" ), 0 );
857
858 if ( !hIndex("charset_type") || hIndex["charset_type"]=="sbcs" )
859 {
860 tSettings.m_iType = TOKENIZER_SBCS;
861
862 } else if ( hIndex["charset_type"]=="utf-8" )
863 {
864 tSettings.m_iType = TOKENIZER_UTF8;
865 if ( hIndex ( "ngram_chars" ) )
866 {
867 if ( tSettings.m_iNgramLen )
868 tSettings.m_iType = TOKENIZER_NGRAM;
869 else
870 sphWarning ( "ngram_chars specified, but ngram_len=0; IGNORED" );
871 }
872
873 } else
874 {
875 sError.SetSprintf ( "unknown charset type '%s'", hIndex["charset_type"].cstr() );
876 return false;
877 }
878
879 tSettings.m_sCaseFolding = hIndex.GetStr ( "charset_table" );
880 tSettings.m_iMinWordLen = Max ( hIndex.GetInt ( "min_word_len" ), 0 );
881 tSettings.m_sNgramChars = hIndex.GetStr ( "ngram_chars" );
882 tSettings.m_sSynonymsFile = hIndex.GetStr ( "exceptions" ); // new option name
883 if ( tSettings.m_sSynonymsFile.IsEmpty() )
884 tSettings.m_sSynonymsFile = hIndex.GetStr ( "synonyms" ); // deprecated option name
885 tSettings.m_sIgnoreChars = hIndex.GetStr ( "ignore_chars" );
886 tSettings.m_sBlendChars = hIndex.GetStr ( "blend_chars" );
887 tSettings.m_sBlendMode = hIndex.GetStr ( "blend_mode" );
888
889 // phrase boundaries
890 int iBoundaryStep = Max ( hIndex.GetInt ( "phrase_boundary_step" ), -1 );
891 if ( iBoundaryStep!=0 )
892 tSettings.m_sBoundary = hIndex.GetStr ( "phrase_boundary" );
893
894 return true;
895 }
896
sphConfDictionary(const CSphConfigSection & hIndex,CSphDictSettings & tSettings)897 void sphConfDictionary ( const CSphConfigSection & hIndex, CSphDictSettings & tSettings )
898 {
899 tSettings.m_sMorphology = hIndex.GetStr ( "morphology" );
900 tSettings.m_sStopwords = hIndex.GetStr ( "stopwords" );
901 tSettings.m_sWordforms = hIndex.GetStr ( "wordforms" );
902 tSettings.m_iMinStemmingLen = hIndex.GetInt ( "min_stemming_len", 1 );
903
904 if ( hIndex("dict") )
905 {
906 tSettings.m_bWordDict = false; // default to crc
907 if ( hIndex["dict"]=="keywords" )
908 tSettings.m_bWordDict = true;
909 else if ( hIndex["dict"]!="crc" )
910 fprintf ( stdout, "WARNING: unknown dict=%s, defaulting to crc\n", hIndex["dict"].cstr() );
911 }
912 }
913
914
sphConfIndex(const CSphConfigSection & hIndex,CSphIndexSettings & tSettings,CSphString & sError)915 bool sphConfIndex ( const CSphConfigSection & hIndex, CSphIndexSettings & tSettings, CSphString & sError )
916 {
917 // misc settings
918 tSettings.m_iMinPrefixLen = Max ( hIndex.GetInt ( "min_prefix_len" ), 0 );
919 tSettings.m_iMinInfixLen = Max ( hIndex.GetInt ( "min_infix_len" ), 0 );
920 tSettings.m_iBoundaryStep = Max ( hIndex.GetInt ( "phrase_boundary_step" ), -1 );
921 tSettings.m_bIndexExactWords = hIndex.GetInt ( "index_exact_words" )!=0;
922 tSettings.m_iOvershortStep = Min ( Max ( hIndex.GetInt ( "overshort_step", 1 ), 0 ), 1 );
923 tSettings.m_iStopwordStep = Min ( Max ( hIndex.GetInt ( "stopword_step", 1 ), 0 ), 1 );
924
925 // prefix/infix fields
926 CSphString sFields;
927
928 sFields = hIndex.GetStr ( "prefix_fields" );
929 sFields.ToLower();
930 sphSplit ( tSettings.m_dPrefixFields, sFields.cstr() );
931
932 sFields = hIndex.GetStr ( "infix_fields" );
933 sFields.ToLower();
934 sphSplit ( tSettings.m_dInfixFields, sFields.cstr() );
935
936 if ( tSettings.m_iMinPrefixLen==0 && tSettings.m_dPrefixFields.GetLength()!=0 )
937 {
938 fprintf ( stdout, "WARNING: min_prefix_len=0, prefix_fields ignored\n" );
939 tSettings.m_dPrefixFields.Reset();
940 }
941
942 if ( tSettings.m_iMinInfixLen==0 && tSettings.m_dInfixFields.GetLength()!=0 )
943 {
944 fprintf ( stdout, "WARNING: min_infix_len=0, infix_fields ignored\n" );
945 tSettings.m_dInfixFields.Reset();
946 }
947
948 // the only way we could have both prefixes and infixes enabled is when specific field subsets are configured
949 if ( tSettings.m_iMinInfixLen>0 && tSettings.m_iMinPrefixLen>0
950 && ( !tSettings.m_dPrefixFields.GetLength() || !tSettings.m_dInfixFields.GetLength() ) )
951 {
952 sError.SetSprintf ( "prefixes and infixes can not both be enabled on all fields" );
953 return false;
954 }
955
956 tSettings.m_dPrefixFields.Uniq();
957 tSettings.m_dInfixFields.Uniq();
958
959 ARRAY_FOREACH ( i, tSettings.m_dPrefixFields )
960 if ( tSettings.m_dInfixFields.Contains ( tSettings.m_dPrefixFields[i] ) )
961 {
962 sError.SetSprintf ( "field '%s' marked both as prefix and infix", tSettings.m_dPrefixFields[i].cstr() );
963 return false;
964 }
965
966 // html stripping
967 if ( hIndex ( "html_strip" ) )
968 {
969 tSettings.m_bHtmlStrip = hIndex.GetInt ( "html_strip" )!=0;
970 tSettings.m_sHtmlIndexAttrs = hIndex.GetStr ( "html_index_attrs" );
971 tSettings.m_sHtmlRemoveElements = hIndex.GetStr ( "html_remove_elements" );
972 }
973
974 // docinfo
975 tSettings.m_eDocinfo = SPH_DOCINFO_EXTERN;
976 if ( hIndex("docinfo") )
977 {
978 if ( hIndex["docinfo"]=="none" ) tSettings.m_eDocinfo = SPH_DOCINFO_NONE;
979 else if ( hIndex["docinfo"]=="inline" ) tSettings.m_eDocinfo = SPH_DOCINFO_INLINE;
980 else if ( hIndex["docinfo"]=="extern" ) tSettings.m_eDocinfo = SPH_DOCINFO_EXTERN;
981 else
982 fprintf ( stdout, "WARNING: unknown docinfo=%s, defaulting to extern\n", hIndex["docinfo"].cstr() );
983 }
984
985 // hit format
986 // TODO! add the description into documentation.
987 tSettings.m_eHitFormat = SPH_HIT_FORMAT_INLINE;
988 if ( hIndex("hit_format") )
989 {
990 if ( hIndex["hit_format"]=="plain" ) tSettings.m_eHitFormat = SPH_HIT_FORMAT_PLAIN;
991 else if ( hIndex["hit_format"]=="inline" ) tSettings.m_eHitFormat = SPH_HIT_FORMAT_INLINE;
992 else
993 fprintf ( stdout, "WARNING: unknown hit_format=%s, defaulting to inline\n", hIndex["hit_format"].cstr() );
994 }
995
996 // hit-less indices
997 if ( hIndex("hitless_words") )
998 {
999 const CSphString & sValue = hIndex["hitless_words"];
1000 if ( sValue=="all" )
1001 {
1002 tSettings.m_eHitless = SPH_HITLESS_ALL;
1003 } else
1004 {
1005 tSettings.m_eHitless = SPH_HITLESS_SOME;
1006 tSettings.m_sHitlessFiles = sValue;
1007 }
1008 }
1009
1010 // sentence and paragraph indexing
1011 tSettings.m_bIndexSP = ( hIndex.GetInt ( "index_sp" )!=0 );
1012 tSettings.m_sZones = hIndex.GetStr ( "index_zones" );
1013
1014 // all good
1015 return true;
1016 }
1017
1018
sphFixupIndexSettings(CSphIndex * pIndex,const CSphConfigSection & hIndex,CSphString & sError)1019 bool sphFixupIndexSettings ( CSphIndex * pIndex, const CSphConfigSection & hIndex, CSphString & sError )
1020 {
1021 bool bTokenizerSpawned = false;
1022
1023 if ( !pIndex->GetTokenizer () )
1024 {
1025 CSphTokenizerSettings tSettings;
1026 if ( !sphConfTokenizer ( hIndex, tSettings, sError ) )
1027 return false;
1028
1029 ISphTokenizer * pTokenizer = ISphTokenizer::Create ( tSettings, sError );
1030 if ( !pTokenizer )
1031 return false;
1032
1033 bTokenizerSpawned = true;
1034 pIndex->SetTokenizer ( pTokenizer );
1035 }
1036
1037 if ( !pIndex->GetDictionary () )
1038 {
1039 CSphDictSettings tSettings;
1040 if ( pIndex->m_bId32to64 )
1041 tSettings.m_bCrc32 = true;
1042 sphConfDictionary ( hIndex, tSettings );
1043 CSphDict * pDict = sphCreateDictionaryCRC ( tSettings, pIndex->GetTokenizer (), sError, pIndex->GetName() );
1044 if ( !pDict )
1045 return false;
1046
1047 pIndex->SetDictionary ( pDict );
1048 }
1049
1050 if ( bTokenizerSpawned )
1051 {
1052 ISphTokenizer * pTokenizer = pIndex->LeakTokenizer ();
1053 ISphTokenizer * pTokenFilter = ISphTokenizer::CreateTokenFilter ( pTokenizer, pIndex->GetDictionary ()->GetMultiWordforms () );
1054 pIndex->SetTokenizer ( pTokenFilter ? pTokenFilter : pTokenizer );
1055 }
1056
1057 if ( !pIndex->IsStripperInited () )
1058 {
1059 CSphIndexSettings tSettings = pIndex->GetSettings ();
1060
1061 if ( hIndex ( "html_strip" ) )
1062 {
1063 tSettings.m_bHtmlStrip = hIndex.GetInt ( "html_strip" )!=0;
1064 tSettings.m_sHtmlIndexAttrs = hIndex.GetStr ( "html_index_attrs" );
1065 tSettings.m_sHtmlRemoveElements = hIndex.GetStr ( "html_remove_elements" );
1066 }
1067 tSettings.m_sZones = hIndex.GetStr ( "index_zones" );
1068
1069 pIndex->Setup ( tSettings );
1070 }
1071
1072 pIndex->PostSetup();
1073
1074 return true;
1075 }
1076
1077 //////////////////////////////////////////////////////////////////////////
1078
sphLoadConfig(const char * sOptConfig,bool bQuiet,CSphConfigParser & cp)1079 const char * sphLoadConfig ( const char * sOptConfig, bool bQuiet, CSphConfigParser & cp )
1080 {
1081 // fallback to defaults if there was no explicit config specified
1082 while ( !sOptConfig )
1083 {
1084 #ifdef SYSCONFDIR
1085 sOptConfig = SYSCONFDIR "/sphinx.conf";
1086 if ( sphIsReadable ( sOptConfig ) )
1087 break;
1088 #endif
1089
1090 sOptConfig = "./sphinx.conf";
1091 if ( sphIsReadable ( sOptConfig ) )
1092 break;
1093
1094 sOptConfig = NULL;
1095 break;
1096 }
1097
1098 if ( !sOptConfig )
1099 sphDie ( "no readable config file (looked in "
1100 #ifdef SYSCONFDIR
1101 SYSCONFDIR "/sphinx.conf, "
1102 #endif
1103 "./sphinx.conf)" );
1104
1105 if ( !bQuiet )
1106 fprintf ( stdout, "using config file '%s'...\n", sOptConfig );
1107
1108 // load config
1109 if ( !cp.Parse ( sOptConfig ) )
1110 sphDie ( "failed to parse config file '%s'", sOptConfig );
1111
1112 CSphConfig & hConf = cp.m_tConf;
1113 if ( !hConf ( "index" ) )
1114 sphDie ( "no indexes found in config file '%s'", sOptConfig );
1115
1116 return sOptConfig;
1117 }
1118
1119 //////////////////////////////////////////////////////////////////////////
1120
1121 static SphLogger_fn g_pLogger = NULL;
1122
Log(ESphLogLevel eLevel,const char * sFmt,va_list ap)1123 inline void Log ( ESphLogLevel eLevel, const char * sFmt, va_list ap )
1124 {
1125 if ( !g_pLogger ) return;
1126 ( *g_pLogger ) ( eLevel, sFmt, ap );
1127 }
1128
sphWarning(const char * sFmt,...)1129 void sphWarning ( const char * sFmt, ... )
1130 {
1131 va_list ap;
1132 va_start ( ap, sFmt );
1133 Log ( SPH_LOG_WARNING, sFmt, ap );
1134 va_end ( ap );
1135 }
1136
1137
sphInfo(const char * sFmt,...)1138 void sphInfo ( const char * sFmt, ... )
1139 {
1140 va_list ap;
1141 va_start ( ap, sFmt );
1142 Log ( SPH_LOG_INFO, sFmt, ap );
1143 va_end ( ap );
1144 }
1145
sphLogFatal(const char * sFmt,...)1146 void sphLogFatal ( const char * sFmt, ... )
1147 {
1148 va_list ap;
1149 va_start ( ap, sFmt );
1150 Log ( SPH_LOG_FATAL, sFmt, ap );
1151 va_end ( ap );
1152 }
1153
sphLogDebug(const char * sFmt,...)1154 void sphLogDebug ( const char * sFmt, ... )
1155 {
1156 va_list ap;
1157 va_start ( ap, sFmt );
1158 Log ( SPH_LOG_DEBUG, sFmt, ap );
1159 va_end ( ap );
1160 }
1161
sphLogDebugv(const char * sFmt,...)1162 void sphLogDebugv ( const char * sFmt, ... )
1163 {
1164 va_list ap;
1165 va_start ( ap, sFmt );
1166 Log ( SPH_LOG_VERBOSE_DEBUG, sFmt, ap );
1167 va_end ( ap );
1168 }
1169
sphLogDebugvv(const char * sFmt,...)1170 void sphLogDebugvv ( const char * sFmt, ... )
1171 {
1172 va_list ap;
1173 va_start ( ap, sFmt );
1174 Log ( SPH_LOG_VERY_VERBOSE_DEBUG, sFmt, ap );
1175 va_end ( ap );
1176 }
1177
sphSetLogger(SphLogger_fn fnLog)1178 void sphSetLogger ( SphLogger_fn fnLog )
1179 {
1180 g_pLogger = fnLog;
1181 }
1182
1183 //////////////////////////////////////////////////////////////////////////
1184 // CRASH REPORTING
1185 //////////////////////////////////////////////////////////////////////////
1186
1187 template <typename Uint>
UItoA(char ** ppOutput,Uint uVal,int iBase=10,int iWidth=0,int iPrec=0,const char cFill=' ')1188 static void UItoA ( char** ppOutput, Uint uVal, int iBase=10, int iWidth=0, int iPrec=0, const char cFill=' ' )
1189 {
1190 assert ( ppOutput );
1191 assert ( *ppOutput );
1192
1193 const char cDigits[] = "0123456789abcdef";
1194
1195 if ( iWidth && iPrec )
1196 {
1197 iPrec = iWidth;
1198 iWidth = 0;
1199 }
1200
1201 if ( !uVal )
1202 {
1203 if ( !iPrec && !iWidth )
1204 *(*ppOutput)++ = cDigits[0];
1205 else
1206 {
1207 while ( iPrec-- )
1208 *(*ppOutput)++ = cDigits[0];
1209 if ( iWidth )
1210 {
1211 while ( --iWidth )
1212 *(*ppOutput)++ = cFill;
1213 *(*ppOutput)++ = cDigits[0];
1214 }
1215 }
1216 return;
1217 }
1218
1219 const BYTE uMaxIndex = 31; // 20 digits for MAX_INT64 in decimal; let it be 31 (32 digits max).
1220 char CBuf[uMaxIndex+1];
1221 char *pRes = &CBuf[uMaxIndex];
1222 char *& pOutput = *ppOutput;
1223
1224 while ( uVal )
1225 {
1226 *pRes-- = cDigits [ uVal % iBase ];
1227 uVal /= iBase;
1228 }
1229
1230 BYTE uLen = (BYTE)( uMaxIndex - (pRes-CBuf) );
1231
1232 if ( iWidth )
1233 while ( uLen < iWidth )
1234 {
1235 *pOutput++ = cFill;
1236 iWidth--;
1237 }
1238
1239 if ( iPrec )
1240 {
1241 while ( uLen < iPrec )
1242 {
1243 *pOutput++=cDigits[0];
1244 iPrec--;
1245 }
1246 iPrec = uLen-iPrec;
1247 }
1248
1249 while ( pRes < CBuf+uMaxIndex-iPrec )
1250 *pOutput++ = *++pRes;
1251 }
1252
1253
sphVSprintf(char * pOutput,const char * sFmt,va_list ap)1254 static int sphVSprintf ( char * pOutput, const char * sFmt, va_list ap )
1255 {
1256 enum eStates { SNORMAL, SPERCENT, SHAVEFILL, SINWIDTH, SINPREC };
1257 eStates state = SNORMAL;
1258 int iPrec = 0;
1259 int iWidth = 0;
1260 char cFill = ' ';
1261 const char * pBegin = pOutput;
1262 bool bHeadingSpace = true;
1263
1264 char c;
1265 while ( ( c = *sFmt++ )!=0 )
1266 {
1267 // handle percent
1268 if ( c=='%' )
1269 {
1270 if ( state==SNORMAL )
1271 {
1272 state = SPERCENT;
1273 iPrec = 0;
1274 iWidth = 0;
1275 cFill = ' ';
1276 } else
1277 {
1278 state = SNORMAL;
1279 *pOutput++ = c;
1280 }
1281 continue;
1282 }
1283
1284 // handle regular chars
1285 if ( state==SNORMAL )
1286 {
1287 *pOutput++ = c;
1288 continue;
1289 }
1290
1291 // handle modifiers
1292 switch ( c )
1293 {
1294 case '0':
1295 if ( state==SPERCENT )
1296 {
1297 cFill = '0';
1298 state = SHAVEFILL;
1299 break;
1300 }
1301 case '1': case '2': case '3':
1302 case '4': case '5': case '6':
1303 case '7': case '8': case '9':
1304 if ( state==SPERCENT || state==SHAVEFILL )
1305 {
1306 state = SINWIDTH;
1307 iWidth = c - '0';
1308 } else if ( state==SINWIDTH )
1309 iWidth = iWidth * 10 + c - '0';
1310 else if ( state==SINPREC )
1311 iPrec = iPrec * 10 + c - '0';
1312 break;
1313
1314 case '-':
1315 if ( state==SPERCENT )
1316 bHeadingSpace = false;
1317 else
1318 state = SNORMAL; // FIXME? means that bad/unhandled syntax with dash will be just ignored
1319 break;
1320
1321 case '.':
1322 state = SINPREC;
1323 iPrec = 0;
1324 break;
1325
1326 case 's': // string
1327 {
1328 const char * pValue = va_arg ( ap, const char * );
1329 int iValue = strlen ( pValue );
1330
1331 if ( iWidth && bHeadingSpace )
1332 while ( iValue < iWidth-- )
1333 *pOutput++ = ' ';
1334
1335 if ( iPrec && iPrec < iValue )
1336 while ( iPrec-- )
1337 *pOutput++ = *pValue++;
1338 else
1339 while ( *pValue )
1340 *pOutput++ = *pValue++;
1341
1342 if ( iWidth && !bHeadingSpace )
1343 while ( iValue < iWidth-- )
1344 *pOutput++ = ' ';
1345
1346 state = SNORMAL;
1347 break;
1348 }
1349
1350 case 'p': // pointer
1351 {
1352 void * pValue = va_arg ( ap, void * );
1353 uint64_t uValue = uint64_t ( pValue );
1354 UItoA ( &pOutput, uValue, 16, iWidth, iPrec, cFill );
1355 state = SNORMAL;
1356 break;
1357 }
1358
1359 case 'x': // hex integer
1360 case 'd': // decimal integer
1361 {
1362 DWORD uValue = va_arg ( ap, DWORD );
1363 UItoA ( &pOutput, uValue, ( c=='x' ) ? 16 : 10, iWidth, iPrec, cFill );
1364 state = SNORMAL;
1365 break;
1366 }
1367
1368 case 'l': // decimal int64
1369 {
1370 int64_t iValue = va_arg ( ap, int64_t );
1371 UItoA ( &pOutput, iValue, 10, iWidth, iPrec, cFill );
1372 state = SNORMAL;
1373 break;
1374 }
1375
1376 default:
1377 state = SNORMAL;
1378 *pOutput++ = c;
1379 }
1380 }
1381
1382 // final zero to EOL
1383 *pOutput++ = '\n';
1384 return pOutput - pBegin;
1385 }
1386
1387
sphWrite(int iFD,const void * pBuf,size_t iSize)1388 bool sphWrite ( int iFD, const void * pBuf, size_t iSize )
1389 {
1390 return ( iSize==(size_t)::write ( iFD, pBuf, iSize ) );
1391 }
1392
1393
1394 static char g_sSafeInfoBuf [ 1024 ];
1395
sphSafeInfo(int iFD,const char * sFmt,...)1396 void sphSafeInfo ( int iFD, const char * sFmt, ... )
1397 {
1398 if ( iFD<0 || !sFmt )
1399 return;
1400
1401 va_list ap;
1402 va_start ( ap, sFmt );
1403 int iLen = sphVSprintf ( g_sSafeInfoBuf, sFmt, ap ); // FIXME! make this vsnprintf
1404 va_end ( ap );
1405 sphWrite ( iFD, g_sSafeInfoBuf, iLen );
1406 }
1407
1408
1409 #if !USE_WINDOWS
1410
1411 #define SPH_BACKTRACE_ADDR_COUNT 128
1412 static void * g_pBacktraceAddresses [SPH_BACKTRACE_ADDR_COUNT];
1413
sphBacktrace(int iFD,bool bSafe)1414 void sphBacktrace ( int iFD, bool bSafe )
1415 {
1416 if ( iFD<0 )
1417 return;
1418
1419 sphSafeInfo ( iFD, "-------------- backtrace begins here ---------------" );
1420 #ifdef COMPILER
1421 sphSafeInfo ( iFD, "Program compiled with " COMPILER );
1422 #endif
1423
1424 #ifdef OS_UNAME
1425 sphSafeInfo ( iFD, "Host OS is "OS_UNAME );
1426 #endif
1427
1428 bool bOk = true;
1429
1430 void * pMyStack = NULL;
1431 int iStackSize = 0;
1432 if ( !bSafe )
1433 {
1434 pMyStack = sphMyStack();
1435 iStackSize = g_iThreadStackSize;
1436 }
1437 sphSafeInfo ( iFD, "Stack bottom = 0x%p, thread stack size = 0x%x", pMyStack, iStackSize );
1438
1439 while ( pMyStack && !bSafe )
1440 {
1441 sphSafeInfo ( iFD, "begin of manual backtrace:" );
1442 BYTE ** pFramePointer = NULL;
1443
1444 int iFrameCount = 0;
1445 int iReturnFrameCount = sphIsLtLib() ? 2 : 1;
1446
1447 #ifdef __i386__
1448 #define SIGRETURN_FRAME_OFFSET 17
1449 __asm __volatile__ ( "movl %%ebp,%0":"=r"(pFramePointer):"r"(pFramePointer) );
1450 #endif
1451
1452 #ifdef __x86_64__
1453 #define SIGRETURN_FRAME_OFFSET 23
1454 __asm __volatile__ ( "movq %%rbp,%0":"=r"(pFramePointer):"r"(pFramePointer) );
1455 #endif
1456
1457 #ifndef SIGRETURN_FRAME_OFFSET
1458 #define SIGRETURN_FRAME_OFFSET 0
1459 #endif
1460
1461 if ( !pFramePointer )
1462 {
1463 sphSafeInfo ( iFD, "Frame pointer is null, backtrace failed (did you build with -fomit-frame-pointer?)" );
1464 break;
1465 }
1466
1467 if ( !pMyStack || (BYTE*) pMyStack > (BYTE*) &pFramePointer )
1468 {
1469 int iRound = Min ( 65536, iStackSize );
1470 pMyStack = (void *) ( ( (size_t) &pFramePointer + iRound ) & ~(size_t)65535 );
1471 sphSafeInfo ( iFD, "Something wrong with thread stack, backtrace may be incorrect (fp=%p)", pFramePointer );
1472
1473 if ( pFramePointer > (BYTE**) pMyStack || pFramePointer < (BYTE**) pMyStack - iStackSize )
1474 {
1475 sphSafeInfo ( iFD, "Wrong stack limit or frame pointer, backtrace failed (fp=%p, stack=%p, stacksize=%d)", pFramePointer, pMyStack, iStackSize );
1476 break;
1477 }
1478 }
1479
1480 sphSafeInfo ( iFD, "Stack looks OK, attempting backtrace." );
1481
1482 BYTE** pNewFP = NULL;
1483 while ( pFramePointer < (BYTE**) pMyStack )
1484 {
1485 pNewFP = (BYTE**) *pFramePointer;
1486 sphSafeInfo ( iFD, "%p", iFrameCount==iReturnFrameCount? *(pFramePointer + SIGRETURN_FRAME_OFFSET) : *(pFramePointer + 1) );
1487
1488 bOk = pNewFP > pFramePointer;
1489 if ( !bOk ) break;
1490
1491 pFramePointer = pNewFP;
1492 iFrameCount++;
1493 }
1494
1495 if ( !bOk )
1496 sphSafeInfo ( iFD, "Something wrong in frame pointers, backtrace failed (fp=%p)", pNewFP );
1497
1498 break;
1499 }
1500
1501 #if HAVE_BACKTRACE
1502 sphSafeInfo ( iFD, "begin of system backtrace:" );
1503 int iDepth = backtrace ( g_pBacktraceAddresses, SPH_BACKTRACE_ADDR_COUNT );
1504 #if HAVE_BACKTRACE_SYMBOLS
1505 sphSafeInfo ( iFD, "begin of system symbols:" );
1506 backtrace_symbols_fd ( g_pBacktraceAddresses, iDepth, iFD );
1507 #elif !HAVE_BACKTRACE_SYMBOLS
1508 sphSafeInfo ( iFD, "begin of manual symbols:" );
1509 for ( int i=0; i<Depth; i++ )
1510 sphSafeInfo ( iFD, "%p", g_pBacktraceAddresses[i] );
1511 #endif // HAVE_BACKTRACE_SYMBOLS
1512 #endif // !HAVE_BACKTRACE
1513
1514 if ( bOk )
1515 sphSafeInfo ( iFD, "Backtrace looks OK. Now you have to do following steps:\n"
1516 " 1. Run the command over the crashed binary (for example, 'indexer'):\n"
1517 " nm -n indexer > indexer.sym\n"
1518 " 2. Attach the binary, generated .sym and the text of backtrace (see above) to the bug report.\n"
1519 "Also you can read the section about resolving backtraces in the documentation.");
1520 sphSafeInfo ( iFD, "-------------- backtrace ends here ---------------" );
1521 }
1522
1523 #else // USE_WINDOWS
1524
sphBacktrace(EXCEPTION_POINTERS * pExc,const char * sFile)1525 void sphBacktrace ( EXCEPTION_POINTERS * pExc, const char * sFile )
1526 {
1527 if ( !pExc || !sFile || !(*sFile) )
1528 {
1529 sphInfo ( "can't generate minidump" );
1530 return;
1531 }
1532
1533 HANDLE hFile = CreateFile ( sFile, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0 );
1534 if ( hFile==INVALID_HANDLE_VALUE )
1535 {
1536 sphInfo ( "can't create minidump file '%s'", sFile );
1537 return;
1538 }
1539
1540 MINIDUMP_EXCEPTION_INFORMATION tExcInfo;
1541 tExcInfo.ExceptionPointers = pExc;
1542 tExcInfo.ClientPointers = FALSE;
1543 tExcInfo.ThreadId = GetCurrentThreadId();
1544
1545 bool bDumped = ( MiniDumpWriteDump ( GetCurrentProcess(), GetCurrentProcessId(), hFile, MiniDumpNormal, &tExcInfo, 0, 0 )==TRUE );
1546 CloseHandle ( hFile );
1547
1548 if ( !bDumped )
1549 sphInfo ( "can't dump minidump" );
1550 }
1551
1552 #endif // USE_WINDOWS
1553
1554
1555 static bool g_bUnlinkOld = true;
sphSetUnlinkOld(bool bUnlink)1556 void sphSetUnlinkOld ( bool bUnlink )
1557 {
1558 g_bUnlinkOld = bUnlink;
1559 }
1560
1561
sphUnlinkIndex(const char * sName,bool bForce)1562 void sphUnlinkIndex ( const char * sName, bool bForce )
1563 {
1564 if ( !( g_bUnlinkOld || bForce ) )
1565 return;
1566
1567 // FIXME! ext list must be in sync with sphinx.cpp, searchd.cpp
1568 const int EXT_COUNT = 9;
1569 const char * dCurExts[EXT_COUNT] = { ".sph", ".spa", ".spi", ".spd", ".spp", ".spm", ".spk", ".sps", ".mvp" };
1570 char sFileName[SPH_MAX_FILENAME_LEN];
1571
1572 for ( int j=0; j<EXT_COUNT; j++ )
1573 {
1574 snprintf ( sFileName, sizeof(sFileName), "%s%s", sName, dCurExts[j] );
1575 // 'mvp' is optional file
1576 if ( ::unlink ( sFileName ) && errno!=ENOENT )
1577 sphWarning ( "unlink failed (file '%s', error '%s'", sFileName, strerror(errno) );
1578 }
1579 }
1580
1581
1582 //
1583 // $Id: sphinxutils.cpp 4113 2013-08-26 07:43:28Z deogar $
1584 //
1585