1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; either version 2 of the License, or
6    (at your option) any later version.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 
18 #include "udm_config.h"
19 
20 #ifdef HAVE_SQL
21 
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <fcntl.h>
29 #include <errno.h>
30 #include <ctype.h>
31 #include <time.h>
32 
33 #ifdef WIN32
34 #include <time.h>
35 #endif
36 
37 #ifdef HAVE_UNISTD_H
38 #include <unistd.h>
39 #endif
40 
41 #ifdef HAVE_SYS_TIME_H
42 #include <sys/time.h>
43 #endif
44 
45 #include "udm_common.h"
46 #include "udm_utils.h"
47 #include "udm_spell.h"
48 #include "udm_robots.h"
49 #include "udm_db.h"
50 #include "udm_unicode.h"
51 #include "udm_unidata.h"
52 #include "udm_url.h"
53 #include "udm_log.h"
54 #include "udm_proto.h"
55 #include "udm_conf.h"
56 #include "udm_hash.h"
57 #include "udm_xmalloc.h"
58 #include "udm_boolean.h"
59 #include "udm_coords.h"
60 #include "udm_searchtool.h"
61 #include "udm_server.h"
62 #include "udm_stopwords.h"
63 #include "udm_doc.h"
64 #include "udm_result.h"
65 #include "udm_vars.h"
66 #include "udm_agent.h"
67 #include "udm_store.h"
68 #include "udm_hrefs.h"
69 #include "udm_word.h"
70 #include "udm_db_int.h"
71 #include "udm_match.h"
72 #include "udm_indexer.h"
73 #include "udm_textlist.h"
74 #include "udm_parsehtml.h"
75 #include "udm_wild.h"
76 #include "udm_http.h"
77 #include "udm_contentencoding.h"
78 #include "udm_indexcache.h"
79 #include "udm_query.h"
80 #include "udm_env.h"
81 
82 
83 static inline UDM_SQL *
UdmSQL(UDM_DB * db)84 UdmSQL(UDM_DB *db)
85 {
86   return &((UDM_SQLDB*) db->specific)->sql;
87 }
88 
89 UDM_SQLDB *
UdmSQLDB(UDM_DB * db)90 UdmSQLDB(UDM_DB *db)
91 {
92   return (UDM_SQLDB*) db->specific;
93 }
94 
95 udm_sqldbtype_t
UdmSQLDBType(UDM_DB * db)96 UdmSQLDBType(UDM_DB *db)
97 {
98   return UdmSQL(db)->DBType;
99 }
100 
101 udm_sqldbapi_t
UdmSQLDBDriver(UDM_DB * db)102 UdmSQLDBDriver(UDM_DB *db)
103 {
104   return UdmSQL(db)->DBDriver;
105 }
106 
107 udm_sqldbmode_t
UdmSQLDBMode(UDM_DB * db)108 UdmSQLDBMode(UDM_DB *db)
109 {
110   return UdmSQLDB(db)->DBMode;
111 }
112 
113 int
UdmSQLDBVersion(UDM_DB * db)114 UdmSQLDBVersion(UDM_DB *db)
115 {
116   return UdmSQL(db)->version;
117 }
118 
119 int
UdmSQLDBConnected(UDM_DB * db)120 UdmSQLDBConnected(UDM_DB *db)
121 {
122   return UdmSQL(db)->connected;
123 }
124 
125 UDM_SQLDB_HANDLER *
UdmSQLDBHandler(UDM_DB * db)126 UdmSQLDBHandler(UDM_DB *db)
127 {
128   return &(UdmSQL(db)->handler);
129 }
130 
131 static const UDM_DBMODE_HANDLER *
UdmSQLDBModeHandler(UDM_DB * db)132 UdmSQLDBModeHandler(UDM_DB *db)
133 {
134   return UdmSQLDB(db)->dbmode_handler;
135 }
136 
137 static int
UdmSQLDBHaveIn(UDM_DB * db)138 UdmSQLDBHaveIn(UDM_DB *db)
139 {
140   return UdmSQL(db)->DBSQL_IN;
141 }
142 
143 int
UdmSQLDBFlags(UDM_DB * db)144 UdmSQLDBFlags(UDM_DB *db)
145 {
146   return UdmSQL(db)->flags;
147 }
148 
149 const char *
UdmSQLDBQueryFrom(UDM_QUERY * query)150 UdmSQLDBQueryFrom(UDM_QUERY *query)
151 {
152   return query->from;
153 }
154 
155 UDM_VARLIST *
UdmSQLDBVars(UDM_DB * db)156 UdmSQLDBVars(UDM_DB *db)
157 {
158   return &UdmSQL(db)->Vars;
159 }
160 
161 
162 char *
UdmDBSQLError(UDM_DB * db)163 UdmDBSQLError(UDM_DB *db)
164 {
165   return UdmSQL(db)->errstr;
166 }
167 
168 
169 size_t
UdmDBSQLErrorSize(UDM_DB * db)170 UdmDBSQLErrorSize(UDM_DB *db)
171 {
172   return sizeof(UdmSQL(db)->errstr);
173 }
174 
175 
176 const char *
UdmDBSQLParamPlaceHolder(UDM_DB * db,size_t i)177 UdmDBSQLParamPlaceHolder(UDM_DB *db, size_t i)
178 {
179   return UdmSQLParamPlaceHolder(UdmSQL(db), i);
180 }
181 
182 
183 static inline udm_rc_t
UdmDBSQLTrace(UDM_AGENT * A,UDM_DB * db,udm_rc_t rc,udm_timer_t start,const char * func)184 UdmDBSQLTrace(UDM_AGENT *A, UDM_DB *db, udm_rc_t rc, udm_timer_t start,
185               const char *func)
186 {
187   if (UdmSQL(db)->flags & UDM_SQL_DEBUG_QUERY)
188   {
189     fprintf(stderr, "%.2f %s\n", UdmStopTimer(&start), func);
190   }
191   return rc;
192 }
193 
194 
195 static inline udm_rc_t
UdmDBSQLTrace1(UDM_AGENT * A,UDM_DB * db,udm_rc_t rc,udm_timer_t start,const char * func,const char * param)196 UdmDBSQLTrace1(UDM_AGENT *A, UDM_DB *db, udm_rc_t rc, udm_timer_t start,
197                const char *func, const char *param)
198 {
199   if (UdmSQL(db)->flags & UDM_SQL_DEBUG_QUERY)
200   {
201     fprintf(stderr, "%.2f %s %s\n", UdmStopTimer(&start), func, param);
202   }
203   return rc;
204 }
205 
206 
207 static inline udm_rc_t
UdmDBSQLTrace2(UDM_AGENT * A,UDM_DB * db,udm_rc_t rc,udm_timer_t start,const char * func,const char * from,const char * to)208 UdmDBSQLTrace2(UDM_AGENT *A, UDM_DB *db, udm_rc_t rc, udm_timer_t start,
209                const char *func, const char *from, const char *to)
210 {
211   if (UdmSQL(db)->flags & UDM_SQL_DEBUG_QUERY)
212   {
213     fprintf(stderr, "%.2f %s %s %s\n", UdmStopTimer(&start), func, from, to);
214   }
215   return rc;
216 }
217 
218 
219 udm_rc_t
UdmDBSQLQuery(UDM_AGENT * A,UDM_DB * db,UDM_SQLRES * Res,const char * buf)220 UdmDBSQLQuery(UDM_AGENT *A, UDM_DB *db, UDM_SQLRES *Res, const char *buf)
221 {
222   udm_timer_t ticks= UdmStartTimer();
223   udm_rc_t rc= UdmSQLQuery(UdmSQL(db), Res, buf);
224   return UdmDBSQLTrace1(A, db, rc, ticks, "SQL", buf);
225 }
226 
227 
228 udm_rc_t
UdmDBSQLDropTableIfExists(UDM_AGENT * A,UDM_DB * db,const char * name)229 UdmDBSQLDropTableIfExists(UDM_AGENT *A, UDM_DB *db, const char *name)
230 {
231   udm_timer_t ticks= UdmStartTimer();
232   udm_rc_t rc= UdmSQLDropTableIfExists(UdmSQL(db), name);
233   return UdmDBSQLTrace1(A, db, rc, ticks, "DropTableIfExists", name);
234 }
235 
236 
237 udm_rc_t
UdmDBSQLTableTruncateOrDelete(UDM_AGENT * A,UDM_DB * db,const char * name)238 UdmDBSQLTableTruncateOrDelete(UDM_AGENT *A, UDM_DB *db, const char *name)
239 {
240   udm_timer_t ticks= UdmStartTimer();
241   udm_rc_t rc= UdmSQLTableTruncateOrDelete(UdmSQL(db), name);
242   return UdmDBSQLTrace1(A, db, rc, ticks, "TableTruncateOrDelete", name);
243 }
244 
245 
246 udm_rc_t
UdmDBSQLCopyStructure(UDM_AGENT * A,UDM_DB * db,const char * from,const char * to)247 UdmDBSQLCopyStructure(UDM_AGENT *A, UDM_DB *db, const char *from, const char *to)
248 {
249   udm_timer_t ticks= UdmStartTimer();
250   udm_rc_t rc= UdmSQLCopyStructure(UdmSQL(db), from, to);
251   return UdmDBSQLTrace2(A, db, rc, ticks, "CopyStructure", from, to);
252 }
253 
254 
255 udm_rc_t
UdmDBSQLRenameTable(UDM_AGENT * A,UDM_DB * db,const char * from,const char * to)256 UdmDBSQLRenameTable(UDM_AGENT *A, UDM_DB *db, const char *from, const char *to)
257 {
258   udm_timer_t ticks= UdmStartTimer();
259   udm_rc_t rc= UdmSQLRenameTable(UdmSQL(db), from, to);
260   return UdmDBSQLTrace2(A, db, rc, ticks, "RenameTable", from, to);
261 }
262 
263 
264 udm_rc_t
UdmDBSQLBegin(UDM_AGENT * A,UDM_DB * db)265 UdmDBSQLBegin(UDM_AGENT *A, UDM_DB *db)
266 {
267   udm_timer_t ticks= UdmStartTimer();
268   udm_rc_t rc= UdmSQLBegin(UdmSQL(db));
269   return UdmDBSQLTrace(A, db, rc, ticks, "Begin");
270 }
271 
272 
273 udm_rc_t
UdmDBSQLCommit(UDM_AGENT * A,UDM_DB * db)274 UdmDBSQLCommit(UDM_AGENT *A, UDM_DB *db)
275 {
276   udm_timer_t ticks= UdmStartTimer();
277   udm_rc_t rc= UdmSQLCommit(UdmSQL(db));
278   return UdmDBSQLTrace(A, db, rc, ticks, "Commit");
279 }
280 
281 
282 udm_rc_t
UdmDBSQLExecDirect(UDM_AGENT * A,UDM_DB * db,UDM_SQLRES * R,const char * query)283 UdmDBSQLExecDirect(UDM_AGENT *A, UDM_DB *db, UDM_SQLRES *R, const char *query)
284 {
285   udm_timer_t ticks= UdmStartTimer();
286   udm_rc_t rc= UdmSQLExecDirect(UdmSQL(db), R, query);
287   return UdmDBSQLTrace1(A, db, rc, ticks, "ExecDirect", query);
288 }
289 
290 
291 udm_rc_t
UdmDBSQLPrepare(UDM_AGENT * A,UDM_DB * db,const char * query)292 UdmDBSQLPrepare(UDM_AGENT *A, UDM_DB *db, const char *query)
293 {
294   udm_timer_t ticks= UdmStartTimer();
295   udm_rc_t rc= UdmSQLPrepare(UdmSQL(db), query);
296   return UdmDBSQLTrace1(A, db, rc, ticks, "Prepare", query);
297 }
298 
299 
300 udm_rc_t
UdmDBSQLExecute(UDM_AGENT * A,UDM_DB * db)301 UdmDBSQLExecute(UDM_AGENT *A, UDM_DB *db)
302 {
303   udm_timer_t ticks= UdmStartTimer();
304   udm_rc_t rc= UdmSQLExecute(UdmSQL(db));
305   return UdmDBSQLTrace(A, db, rc, ticks, "Execute");
306 }
307 
308 
309 udm_rc_t
UdmDBSQLFetchRow(UDM_AGENT * A,UDM_DB * db,UDM_SQLRES * R,UDM_STR * p)310 UdmDBSQLFetchRow(UDM_AGENT *A, UDM_DB *db, UDM_SQLRES *R, UDM_STR *p)
311 {
312   return UdmSQLDBHandler(db)->FetchRow(UdmSQL(db), R, p);
313 }
314 
315 
316 udm_rc_t
UdmDBSQLStmtFree(UDM_AGENT * A,UDM_DB * db)317 UdmDBSQLStmtFree(UDM_AGENT *A, UDM_DB *db)
318 {
319   udm_timer_t ticks= UdmStartTimer();
320   udm_rc_t rc= UdmSQLStmtFree(UdmSQL(db));
321   return UdmDBSQLTrace(A, db, rc, ticks, "StmtFree");
322 }
323 
324 
325 static void
UdmSQLPrintParameter(FILE * file,const void * data,int size)326 UdmSQLPrintParameter(FILE *file, const void *data, int size)
327 {
328   const unsigned char *s= (const unsigned char*) data;
329   const unsigned char *e= s + size;
330   fprintf(stderr, "'");
331   for ( ; s < e; s++)
332   {
333     if (*s >= 0x20 && *s <= 0x7E)
334       fprintf(file, "%c", *s);
335     else
336       fprintf(file, "\\x%02X", (int) *s);
337   }
338   fprintf(stderr, "'");
339 }
340 
341 
342 udm_rc_t
UdmDBSQLBindParameter(UDM_AGENT * A,UDM_DB * db,int pos,const void * data,int size,udm_sqltype_t type)343 UdmDBSQLBindParameter(UDM_AGENT *A, UDM_DB *db,
344                       int pos, const void *data, int size, udm_sqltype_t type)
345 {
346   udm_timer_t ticks= UdmStartTimer();
347   udm_rc_t rc= UdmSQLBindParameter(UdmSQL(db), pos, data, size, type);
348   UDM_ASSERT(size == 4 || type != UDM_SQLTYPE_INT32);
349   if (UdmSQL(db)->flags & UDM_SQL_DEBUG_QUERY)
350   {
351     fprintf(stderr, "%.2f BindParameter[%d] %s(%d)",
352             UdmStopTimer(&ticks), pos, UdmSQLTypeToStr(type), size);
353     UdmSQLPrintParameter(stderr, data, size);
354     fprintf(stderr, "\n");
355   }
356   return rc;
357 }
358 
359 
360 udm_rc_t
UdmDBSQLLockOrBegin(UDM_AGENT * A,UDM_DB * db,const char * param)361 UdmDBSQLLockOrBegin(UDM_AGENT *A, UDM_DB *db, const char *param)
362 {
363   udm_timer_t ticks= UdmStartTimer();
364   udm_rc_t rc= UdmSQLLockOrBegin(UdmSQL(db), param);
365   return UdmDBSQLTrace1(A, db, rc, ticks, "LockOrBegin", param);
366 }
367 
368 
369 udm_rc_t
UdmDBSQLUnlockOrCommit(UDM_AGENT * A,UDM_DB * db)370 UdmDBSQLUnlockOrCommit(UDM_AGENT *A, UDM_DB *db)
371 {
372   udm_timer_t ticks= UdmStartTimer();
373   udm_rc_t rc= UdmSQLUnlockOrCommit(UdmSQL(db));
374   return UdmDBSQLTrace(A, db, rc, ticks, "UnlockOrCommit");
375 }
376 
377 
378 udm_rc_t
UdmDBSQLQueryOneRowInt(UDM_AGENT * A,UDM_DB * db,int * res,const char * qbuf)379 UdmDBSQLQueryOneRowInt(UDM_AGENT *A, UDM_DB *db, int *res, const char *qbuf)
380 {
381   udm_timer_t ticks= UdmStartTimer();
382   udm_rc_t rc= UdmSQLQueryOneRowInt(UdmSQL(db), res, qbuf);
383   return UdmDBSQLTrace1(A, db, rc, ticks, "QueryOneRowInt", qbuf);
384 }
385 
386 
387 char *
UdmDBSQLEscStrAlloc(UDM_AGENT * A,UDM_DB * db,const char * src,size_t srclen)388 UdmDBSQLEscStrAlloc(UDM_AGENT *A, UDM_DB *db, const char *src, size_t srclen)
389 {
390   return UdmSQLEscStrAlloc(UdmSQL(db), src, srclen);
391 }
392 
393 
394 char *
UdmDBSQLEscStrSimple(UDM_AGENT * A,UDM_DB * db,char * to,const char * from,size_t l)395 UdmDBSQLEscStrSimple(UDM_AGENT *A, UDM_DB *db, char *to, const char *from, size_t l)
396 {
397   return UdmSQLEscStrSimple(UdmSQL(db), to, from, l);
398 }
399 
400 
401 size_t
UdmDBSQLBinEscStr(UDM_AGENT * A,UDM_DB * db,char * dst,size_t dstlen,const char * src,size_t srclen)402 UdmDBSQLBinEscStr(UDM_AGENT *A, UDM_DB *db, char *dst, size_t dstlen, const char *src, size_t srclen)
403 {
404   return UdmSQLBinEscStr(UdmSQL(db), dst, dstlen, src, srclen);
405 }
406 
407 size_t
UdmDBSQLEscStr(UDM_AGENT * A,UDM_DB * db,char * to,const char * from,size_t l)408 UdmDBSQLEscStr(UDM_AGENT *A, UDM_DB *db, char *to, const char *from, size_t l)
409 {
410   return UdmSQLEscStr(UdmSQL(db), to, from, l);
411 }
412 
413 void
UdmDBSQLTopClause(UDM_AGENT * A,UDM_DB * db,size_t top_num,UDM_SQL_TOP_CLAUSE * Top)414 UdmDBSQLTopClause(UDM_AGENT *A, UDM_DB *db, size_t top_num, UDM_SQL_TOP_CLAUSE *Top)
415 {
416   UdmSQLTopClause(UdmSQL(db), top_num, Top);
417 }
418 
419 static const char*
UdmSQLDBTypeToStr(udm_sqldbtype_t dbtype)420 UdmSQLDBTypeToStr(udm_sqldbtype_t dbtype)
421 {
422   switch(dbtype)
423   {
424     case UDM_DB_MYSQL:   return "mysql";
425     case UDM_DB_PGSQL:   return "pgsql";
426     case UDM_DB_IBASE:   return "ibase";
427     case UDM_DB_MSSQL:   return "mssql";
428     case UDM_DB_ORACLE8: return "oracle";
429     case UDM_DB_SQLITE:  return "sqlite";
430     case UDM_DB_SQLITE3: return "sqlite";
431     case UDM_DB_MIMER:   return "mimer";
432     case UDM_DB_VIRT:    return "virtuoso";
433     case UDM_DB_ACCESS:  return "access";
434     case UDM_DB_DB2:     return "db2";
435     case UDM_DB_CACHE:   return "cache";
436     case UDM_DB_SYBASE:  return "sybase";
437     case UDM_DB_MONETDB: return "monetdb";
438     case UDM_DB_SOLID:   return "solid";
439     case UDM_DB_SAPDB:   return "sapdb";
440   }
441   return "unknown_dbtype";
442 }
443 
444 
445 static const char*
UdmSQLDBModeToStr(udm_sqldbmode_t dbmode)446 UdmSQLDBModeToStr(udm_sqldbmode_t dbmode)
447 {
448   switch (dbmode)
449   {
450     case UDM_SQLDBMODE_SINGLE:  return "single";
451     case UDM_SQLDBMODE_MULTI:   return "multi";
452     case UDM_SQLDBMODE_BLOB:    return "blob";
453     case UDM_SQLDBMODE_RAWBLOB: return "rawblob";
454   }
455   return "unknown_dbmode";
456 }
457 
458 
459 static udm_sqldbmode_t
UdmStr2DBMode(const char * str1,udm_bool_t * error)460 UdmStr2DBMode(const char * str1, udm_bool_t *error)
461 {
462   *error= UDM_FALSE;
463   if (!strncasecmp(str1,"single",6)) return UDM_SQLDBMODE_SINGLE;
464   if (!strncasecmp(str1,"multi",5))  return UDM_SQLDBMODE_MULTI;
465   if (!strncasecmp(str1,"blob",4))   return UDM_SQLDBMODE_BLOB;
466   if (!strncasecmp(str1,"rawblob",7))return UDM_SQLDBMODE_RAWBLOB;
467   *error= UDM_TRUE;
468   return UDM_SQLDBMODE_BLOB;
469 }
470 
471 
472 /************** some forward declarations ********************/
473 static udm_rc_t UdmDeleteURL(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,UDM_DB *db);
474 static udm_rc_t UdmDeleteBadHrefs(UDM_AGENT *Indexer,
475                                   UDM_DOCUMENT *Doc,
476                                   UDM_DB *db,
477                                   urlid_t url_id);
478 static udm_rc_t UdmDeleteWordFromURL(UDM_AGENT *A, UDM_DOCUMENT *D, UDM_DB *db);
479 
480 
481 /*********************** helper functions **********************/
482 
483 static size_t
WhereConditionAddAnd(char * where,const char * add)484 WhereConditionAddAnd(char *where, const char *add)
485 {
486   if (add[0])
487   {
488     const char *joiner= where[0] ? " AND " : "";
489     return sprintf(where + strlen(where), "%s%s", joiner, add);
490   }
491   return 0;
492 }
493 
494 
495 static void
WhereConditionDSTRAddAnd(char * where,UDM_DSTR * add)496 WhereConditionDSTRAddAnd(char *where, UDM_DSTR *add)
497 {
498   if (UdmDSTRLength(add))
499   {
500     if (where[0])
501       strcat(where, " AND ");
502     strcat(where, UdmDSTRPtr(add));
503   }
504 }
505 
506 
507 /* Prepare to add a new condition into a class */
508 static udm_rc_t
UdmSQLWhereAddJoiner(UDM_DSTR * dstr,const char * joiner)509 UdmSQLWhereAddJoiner(UDM_DSTR *dstr, const char *joiner)
510 {
511   if (UdmDSTRLength(dstr))  /* Second (or more) condition of the same class */
512   {
513     UdmDSTRShrinkLast(dstr);
514     UdmDSTRAppendSTR(dstr, joiner);
515   }
516   else                 /* First condition of this class */
517   {
518     UdmDSTRAppend(dstr, "(", 1);
519   }
520   return UDM_OK;
521 }
522 
523 
524 static udm_rc_t
UdmSQLWhereIntParam(UDM_DB * db,UDM_DSTR * dstr,const char * sqlname,const char * val)525 UdmSQLWhereIntParam(UDM_DB *db,
526                     UDM_DSTR *dstr, const char *sqlname, const char *val)
527 {
528   const char *range= strchr(val, '-');
529   UdmDSTRRealloc(dstr, UdmDSTRLength(dstr) + strlen(val) + 50);
530   if (UdmSQL(db)->DBSQL_IN && !range) /* Single value */
531   {
532     if (!UdmDSTRLength(dstr)) /* First parameter */
533     {
534       UdmDSTRAppendf(dstr, " %s IN (%d)", sqlname, atoi(val));
535     }
536     else /* Second or higher parameter */
537     {
538       UdmDSTRShrinkLast(dstr);
539       UdmDSTRAppendf(dstr, ",%d)", atoi(val));
540     }
541   }
542   else /* Range */
543   {
544     int first, second;
545     UdmSQLWhereAddJoiner(dstr, " OR ");
546     if (range && 2 == sscanf(val, "%d-%d", &first, &second))
547     {
548       UdmDSTRAppendf(dstr,
549                      "%s>=%d AND %s<=%d)",
550                      sqlname, first, sqlname, second);
551     }
552     else
553     {
554       UdmDSTRAppendf(dstr, "%s=%d)", sqlname, atoi(val));
555     }
556   }
557   return UDM_OK;
558 }
559 
560 
561 #define UDM_ADD_PARAM_NEG                1  /* If reverse condition      */
562 #define UDM_ADD_PARAM_WITH_TAIL_PERCENT  2  /* If the trailing % needed  */
563 #define UDM_ADD_PARAM_CHECK_URL_SCHEMA   4  /* Detect URL prefix         */
564 
565 #define UDM_ADD_PARAM_URL     (UDM_ADD_PARAM_CHECK_URL_SCHEMA|UDM_ADD_PARAM_WITH_TAIL_PERCENT)
566 #define UDM_ADD_PARAM_URL_NEG (UDM_ADD_PARAM_NEG|UDM_ADD_PARAM_URL)
567 
568 
569 static udm_rc_t
UdmSQLWhereStrParam(UDM_DB * db,UDM_DSTR * dstr,const char * sqlname,const char * val,int flag)570 UdmSQLWhereStrParam(UDM_DB *db, UDM_DSTR *dstr,
571                     const char *sqlname, const char *val, int flag)
572 {
573   const char *first= "";
574   const char *last= (flag & UDM_ADD_PARAM_WITH_TAIL_PERCENT) ? "%" : "";
575   const char *joiner= (flag & UDM_ADD_PARAM_NEG) ? " AND " : " OR ";
576   const char *notx= (flag & UDM_ADD_PARAM_NEG) ? "NOT " : "";
577 
578   if (flag & UDM_ADD_PARAM_CHECK_URL_SCHEMA)
579   {
580     UDM_URL URL;
581     UdmURLInit(&URL);
582     UdmURLParse(&URL,val);
583     /* Check if URL prefix is not given / given */
584     first= (URL.schema == NULL) ? "%" : "";
585     UdmURLFree(&URL);
586   }
587 
588   UdmSQLWhereAddJoiner(dstr, joiner);
589   UdmDSTRAppendf(dstr, "%s %sLIKE '%s%s%s')", sqlname, notx, first, val, last);
590   return UDM_OK;
591 }
592 
593 
594 typedef struct udm_date_param_st
595 {
596   int dt;
597   int dx;
598   int dm;
599   int dy;
600   int dd;
601   time_t dp;
602   int DB;
603   int DE;
604   int dstmp;
605 } UDM_DATE_PARAM;
606 
607 
608 static void
UdmDateParamInit(UDM_DATE_PARAM * d)609 UdmDateParamInit(UDM_DATE_PARAM *d)
610 {
611   d->dt= UDM_DT_UNKNOWN;
612   d->dx= 1;
613   d->dm= 0;
614   d->dy= 1970;
615   d->dd= 1;
616   d->dp= (time_t) 0;
617   d->DB= 0;
618   d->DE= time(NULL);
619   d->dstmp= 0;
620 }
621 
622 
623 static udm_rc_t
UdmCheckDateParam(UDM_DATE_PARAM * d,const char * var,const char * val)624 UdmCheckDateParam(UDM_DATE_PARAM *d,
625                   const char *var, const char *val)
626 {
627   int intval= atoi(val);
628   int longval= atol(val);
629 
630   if (!strcmp(var, "dt"))
631   {
632     if(!strcasecmp(val, "back")) d->dt= UDM_DT_BACK;
633     else if (!strcasecmp(val, "er")) d->dt= UDM_DT_ER;
634     else if (!strcasecmp(val, "range")) d->dt= UDM_DT_RANGE;
635   }
636   else if (!strcmp(var, "dx"))
637   {
638     if (intval == 1 || intval == -1) d->dx= intval;
639     else d->dx= 1;
640   }
641   else if (!strcmp(var, "dm"))
642   {
643     d->dm= intval; /* 0=Jan, 1=Feb,..., 11=Dec.*/
644   }
645   else if (!strcmp(var, "dy"))
646   {
647     d->dy= (intval) ? intval : 1970;
648   }
649   else if (!strcmp(var, "dd"))
650   {
651     d->dd= (intval) ? intval : 1;
652   }
653   else if (!strcmp(var, "dstmp"))
654   {
655     d->dstmp= longval ? longval : 0;
656   }
657   else if (!strcmp(var, "dp"))
658   {
659     d->dp= Udm_dp2time_t(val);
660   }
661   else if (!strcmp(var, "db"))
662   {
663     struct tm tm;
664     bzero((void*) &tm, sizeof(tm));
665     sscanf(val, "%d/%d/%d", &tm.tm_mday, &tm.tm_mon, &tm.tm_year);
666     tm.tm_year -= 1900; tm.tm_mon--;
667     d->DB= mktime(&tm);
668   }
669   else if (!strcmp(var, "de"))
670   {
671     struct tm tm;
672     bzero((void*) &tm, sizeof(tm));
673     sscanf(val, "%d/%d/%d", &tm.tm_mday, &tm.tm_mon, &tm.tm_year);
674     tm.tm_year -= 1900; tm.tm_mon--;
675     d->DE= mktime(&tm) + 86400; /* Including the given date */
676   }
677   else
678     return UDM_ERROR;
679   return UDM_OK;
680 }
681 
682 
683 static void
UdmSQLWhereDateParam(UDM_DSTR * cond,UDM_DATE_PARAM * d)684 UdmSQLWhereDateParam(UDM_DSTR *cond, UDM_DATE_PARAM *d)
685 {
686   switch(d->dt)
687   {
688     case UDM_DT_BACK:
689       if (d->dp)
690         UdmDSTRAppendf(cond, "url.last_mod_time >= %li",
691                        (long int) time(NULL) - d->dp);
692       break;
693     case UDM_DT_ER:
694       {
695       struct tm tm;
696         bzero((void*) &tm, sizeof(tm));
697         tm.tm_mday= d->dd;
698         tm.tm_mon= d->dm;
699         tm.tm_year= d->dy - 1900;
700         UdmDSTRAppendf(cond, "url.last_mod_time %s %li",
701                        (d->dx == 1) ? ">=" : "<=",
702                        (long int) (d->dstmp ? d->dstmp : mktime(&tm)));
703       }
704       break;
705     case UDM_DT_RANGE:
706       UdmDSTRAppendf(cond,
707                      "url.last_mod_time >= %li AND url.last_mod_time <= %li",
708                      (long int) d->DB, (long int) d->DE);
709       break;
710     case UDM_DT_UNKNOWN:
711     default:
712       break;
713   }
714 }
715 
716 typedef struct udm_search_param_st
717 {
718   const char *name;
719 } UDM_SEARCH_PARAM;
720 
721 
722 static const UDM_SEARCH_PARAM search_params[]=
723 {
724   {"ul"},
725   {"ue"},
726   {"u"},
727   {"tag"},
728   {"t"},
729   {"lang"},
730   {"g"},
731   {"type"},
732   {"typ"},
733   {"sl.*"},
734   {NULL}
735 };
736 
737 
738 static const UDM_SEARCH_PARAM*
UdmFindStringParam(const char * name)739 UdmFindStringParam(const char *name)
740 {
741   const UDM_SEARCH_PARAM *param;
742   for (param= search_params; param->name; param++)
743   {
744     if (!UdmWildCaseCmp(name, param->name))
745       return param;
746   }
747   return NULL;
748 }
749 
750 
751 typedef struct
752 {
753   UDM_DSTR from;
754   UDM_DSTR lang;
755   UDM_DSTR seed;
756   UDM_DSTR server;
757   UDM_DSTR site;
758   UDM_DSTR status;
759   UDM_DSTR tag;
760   UDM_DSTR timecond;
761   UDM_DSTR type;
762   UDM_DSTR ue;
763   UDM_DSTR url;
764   UDM_DSTR urlinfo;
765   UDM_DATE_PARAM datep;
766   int fromserver;
767   int fromurlinfo_lang;
768   int fromurlinfo_type;
769   int fromurlinfo;
770 } UDM_SQL_CONDITION_PARAM;
771 
772 
773 static void
UdmSQLConditionParamInit(UDM_SQL_CONDITION_PARAM * param)774 UdmSQLConditionParamInit(UDM_SQL_CONDITION_PARAM *param)
775 {
776   UdmDSTRInit(&param->from, 64);
777   UdmDSTRInit(&param->lang, 64);
778   UdmDSTRInit(&param->seed, 64);
779   UdmDSTRInit(&param->server, 64);
780   UdmDSTRInit(&param->site, 64);
781   UdmDSTRInit(&param->status, 64);
782   UdmDSTRInit(&param->tag, 64);
783   UdmDSTRInit(&param->timecond, 64);
784   UdmDSTRInit(&param->type, 64);
785   UdmDSTRInit(&param->ue, 64);
786   UdmDSTRInit(&param->url, 64);
787   UdmDSTRInit(&param->urlinfo, 64);
788   UdmDateParamInit(&param->datep);
789   param->fromserver= 1;
790   param->fromurlinfo_lang= 1;
791   param->fromurlinfo_type= 1;
792   param->fromurlinfo= 1;
793 }
794 
795 
796 static void
UdmSQLConditionParamFree(UDM_SQL_CONDITION_PARAM * param)797 UdmSQLConditionParamFree(UDM_SQL_CONDITION_PARAM *param)
798 {
799   UdmDSTRFree(&param->from);
800   UdmDSTRFree(&param->lang);
801   UdmDSTRFree(&param->seed);
802   UdmDSTRFree(&param->server);
803   UdmDSTRFree(&param->site);
804   UdmDSTRFree(&param->status);
805   UdmDSTRFree(&param->tag);
806   UdmDSTRFree(&param->timecond);
807   UdmDSTRFree(&param->type);
808   UdmDSTRFree(&param->ue);
809   UdmDSTRFree(&param->url);
810   UdmDSTRFree(&param->urlinfo);
811 }
812 
813 
814 static size_t
UdmSQLConditionParamTotalLength(const UDM_SQL_CONDITION_PARAM * CondParam)815 UdmSQLConditionParamTotalLength(const UDM_SQL_CONDITION_PARAM *CondParam)
816 {
817   return
818     UdmDSTRLength(&CondParam->from)     +
819     UdmDSTRLength(&CondParam->lang)     +
820     UdmDSTRLength(&CondParam->seed)     +
821     UdmDSTRLength(&CondParam->server)   +
822     UdmDSTRLength(&CondParam->site)     +
823     UdmDSTRLength(&CondParam->status)   +
824     UdmDSTRLength(&CondParam->tag)      +
825     UdmDSTRLength(&CondParam->timecond) +
826     UdmDSTRLength(&CondParam->type)     +
827     UdmDSTRLength(&CondParam->ue)       +
828     UdmDSTRLength(&CondParam->url)      +
829     UdmDSTRLength(&CondParam->urlinfo);
830 }
831 
832 
833 static udm_rc_t
UdmSQLConditionParamAdd(UDM_SQL_CONDITION_PARAM * CondParam,UDM_AGENT * A,UDM_DB * db,const char * var,size_t varlen,const char * val,size_t vallen)834 UdmSQLConditionParamAdd(UDM_SQL_CONDITION_PARAM *CondParam,
835                         UDM_AGENT *A, UDM_DB *db,
836                         const char *var, size_t varlen,
837                         const char *val, size_t vallen)
838 {
839   char varbuf[64];
840   char valbuf[128 + 1];
841 
842   if (!vallen || varlen > (int) sizeof(varbuf))
843     return UDM_OK;
844 
845   if (vallen > (int) sizeof(valbuf) - 1)
846   {
847     vallen= sizeof(valbuf) - 1;
848     if (UdmFindStringParam(var))
849     {
850       udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db),
851                    "Limit is too long: %.*s=%.*s",
852                    (int) varlen, var, (int) vallen, val);
853       return UDM_ERROR;
854     }
855   }
856 
857   /* Protection against SQL injection */
858   var= UdmDBSQLEscStrSimple(A, db, varbuf, var, varlen);
859   val= UdmDBSQLEscStrSimple(A, db, valbuf, val, vallen);
860 
861   if (!strcmp(var, "status"))
862     UdmSQLWhereIntParam(db, &CondParam->status, "url.status", val);
863 
864   if (!strcmp(var, "seed"))
865     UdmSQLWhereIntParam(db, &CondParam->seed, "url.seed", val);
866 
867   if (!strcmp(var, "site") && val[0] != 0)
868     UdmSQLWhereStrParam(db, &CondParam->site, "url.url", val, UDM_ADD_PARAM_WITH_TAIL_PERCENT);
869 
870   if(!strcmp(var,"ul"))
871     UdmSQLWhereStrParam(db, &CondParam->url, "url.url", val, UDM_ADD_PARAM_URL);
872 
873   if(!strcmp(var,"ue"))
874     UdmSQLWhereStrParam(db, &CondParam->ue, "url.url", val, UDM_ADD_PARAM_URL_NEG);
875 
876   if(!strcmp(var,"u"))
877     UdmSQLWhereStrParam(db, &CondParam->url, "url.url", val, 0);
878 
879   if(!strcmp(var,"tag") || !strcmp(var,"t"))
880   {
881     UdmSQLWhereStrParam(db, &CondParam->tag, "s.tag", val, 0);
882     if (CondParam->fromserver)
883     {
884       CondParam->fromserver= 0;
885       UdmDSTRAppendSTR(&CondParam->from, ", server s");
886       UdmDSTRAppendSTR(&CondParam->server, " AND s.rec_id=url.server_id");
887     }
888   }
889 
890   if(!strcmp(var,"lang") || !strcmp(var,"g"))
891   {
892     UdmSQLWhereStrParam(db, &CondParam->lang, "il.sval", val, 0);
893     if (CondParam->fromurlinfo_lang)
894     {
895       CondParam->fromurlinfo_lang= 0;
896       UdmDSTRAppendSTR(&CondParam->from, ", urlinfo il");
897       UdmDSTRAppendSTR(&CondParam->server, " AND il.url_id=url.rec_id AND il.sname='Content-Language'");
898     }
899   }
900 
901   if(!strncmp(var, "sl.", 3))
902   {
903     UdmSQLWhereAddJoiner(&CondParam->urlinfo, " AND ");
904     UdmDSTRAppendf(&CondParam->urlinfo,"isl%d.sname='%s' AND isl%d.sval LIKE '%s')",
905                    CondParam->fromurlinfo, var + 3, CondParam->fromurlinfo, val);
906     UdmDSTRAppendf(&CondParam->from, ", urlinfo isl%d", CondParam->fromurlinfo);
907     UdmDSTRAppendf(&CondParam->server, " AND isl%d.url_id=url.rec_id",
908                    CondParam->fromurlinfo);
909     CondParam->fromurlinfo++;
910   }
911 
912   if (!strcmp(var,"type") || !strcmp(var, "typ"))
913   {
914     /*
915        "type" is a reserved word in ASP,
916        so "typ" is also added as a workaround
917     */
918     UdmSQLWhereStrParam(db, &CondParam->type, "it.sval", val, 0);
919     if (CondParam->fromurlinfo_type)
920     {
921       CondParam->fromurlinfo_type= 0;
922       UdmDSTRAppendSTR(&CondParam->from, ", urlinfo it");
923       UdmDSTRAppendSTR(&CondParam->server, " AND it.url_id=url.rec_id AND it.sname='Content-Type'");
924     }
925   }
926   UdmCheckDateParam(&CondParam->datep, var, val);
927   return UDM_OK;
928 }
929 
930 
931 static udm_rc_t
UdmSQLConditionParamPopulate(UDM_SQL_CONDITION_PARAM * CondParam,UDM_AGENT * A,UDM_DB * db,const UDM_VARLIST * Vars)932 UdmSQLConditionParamPopulate(UDM_SQL_CONDITION_PARAM *CondParam,
933                              UDM_AGENT *A, UDM_DB *db,
934                              const UDM_VARLIST *Vars)
935 {
936   size_t i;
937   for (i= 0; i < Vars->nvars; i++)
938   {
939     const UDM_VAR *Var= UdmVarListFindConstByIndex(Vars, i);
940     const char *var= UdmVarName(Var) ? UdmVarName(Var) : "";
941     UDM_CONST_STR valbuf, *val= UdmVarGetConstStr(Var, &valbuf);
942     size_t varlen= strlen(var);
943     udm_rc_t rc;
944 
945     if (UDM_OK != (rc= UdmSQLConditionParamAdd(CondParam, A, db,
946                                                var, varlen,
947                                                val->str, val->length)))
948       return rc;
949   }
950   UdmSQLWhereDateParam(&CondParam->timecond, &CondParam->datep);
951   return UDM_OK;
952 }
953 
954 
955 static udm_rc_t
UdmSQLBuildWhereCondition(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,char const ** dst)956 UdmSQLBuildWhereCondition(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
957                           char const **dst)
958 {
959   UDM_ENV *Conf= A->Conf;
960   size_t nbytes;
961   UDM_SQL_CONDITION_PARAM CondParam;
962   udm_bool_t need_new= UdmVarListFindBool(&A->Conf->Vars, "delta", UDM_FALSE);
963   udm_rc_t rc= UDM_OK;
964   const char *need_new_str= need_new ? /* TODO34: remove this */
965     "url.rec_id IN (SELECT url_id FROM bdicti WHERE state=1)" : "";
966 
967   UDM_LOCK_CHECK_OWNER(A, UDM_LOCK_DB);
968   if (Query->where)
969   {
970     *dst= Query->where;
971     return UDM_OK;
972   }
973 
974   UdmSQLConditionParamInit(&CondParam);
975 
976   if (UDM_OK != (rc= UdmSQLConditionParamPopulate(&CondParam,
977                                                   A, db, &Conf->Vars)))
978     goto ret;
979 
980   if (!(nbytes= UdmSQLConditionParamTotalLength(&CondParam) + strlen(need_new_str)))
981   {
982     Query->where= (char*) UdmStrdup("");
983     Query->from= (char*) UdmStrdup("");
984     goto ret;
985   }
986   UdmQueryClearLimits(Query);
987   Query->where= (char*) UdmMalloc(nbytes + 100);
988   Query->where[0]= '\0';
989   Query->from= (char*) UdmStrdup(UdmDSTRLength(&CondParam.from) ?
990                                  UdmDSTRPtr(&CondParam.from) : "");
991   if (UdmDSTRLength(&CondParam.url))
992     strcat(Query->where, UdmDSTRPtr(&CondParam.url));
993 
994   WhereConditionDSTRAddAnd(Query->where, &CondParam.lang);
995   WhereConditionDSTRAddAnd(Query->where, &CondParam.seed);
996   WhereConditionDSTRAddAnd(Query->where, &CondParam.site);
997   WhereConditionDSTRAddAnd(Query->where, &CondParam.status);
998   WhereConditionDSTRAddAnd(Query->where, &CondParam.tag);
999   WhereConditionDSTRAddAnd(Query->where, &CondParam.timecond);
1000   WhereConditionDSTRAddAnd(Query->where, &CondParam.type);
1001   WhereConditionDSTRAddAnd(Query->where, &CondParam.ue);
1002   WhereConditionDSTRAddAnd(Query->where, &CondParam.urlinfo);
1003 
1004   if (UdmDSTRLength(&CondParam.server))
1005   {
1006     if (!Query->where[0]) strcat(Query->where, " 1=1 ");
1007     strcat(Query->where, UdmDSTRPtr(&CondParam.server));
1008   }
1009 
1010   WhereConditionAddAnd(Query->where, need_new_str);
1011 
1012   /* Need this for test purposes */
1013   UdmVarListReplaceStr(&Conf->Vars, "WhereCondition", Query->where);
1014 
1015 ret:
1016   *dst= Query->where;
1017   UdmSQLConditionParamFree(&CondParam);
1018   return rc;
1019 }
1020 
1021 
1022 static udm_rc_t
UdmVarListSQLEscape(UDM_AGENT * A,UDM_VARLIST * dst,UDM_VARLIST * src,UDM_DB * db)1023 UdmVarListSQLEscape(UDM_AGENT *A, UDM_VARLIST *dst, UDM_VARLIST *src, UDM_DB *db)
1024 {
1025   size_t i, nbytes= 0;
1026   char *tmp= NULL;
1027   for (i= 0; i < src->nvars; i++)
1028   {
1029     const UDM_VAR *V= UdmVarListFindConstByIndex(src, i);
1030     UDM_CONST_STR valuebuf, *value= UdmVarGetConstStr(V, &valuebuf);
1031     if (nbytes < value->length * 2 + 1)
1032     {
1033       nbytes= value->length * 2 + 1;
1034       tmp= (char*) UdmRealloc(tmp, nbytes);
1035     }
1036     UdmDBSQLEscStr(A, db, tmp, value->str ? value->str : "", value->length); /* doc Section */
1037     UdmVarListAddStr(dst, UdmVarName(V), tmp);
1038   }
1039   UdmFree(tmp);
1040   return UDM_OK;
1041 }
1042 
1043 
1044 /************* Servers ******************************************/
1045 
1046 #define UDM_SERVER_TABLE_COLUMNS \
1047   "rec_id,url,tag,command,weight,ordre,parent,enabled "
1048 
1049 static udm_rc_t
UdmServerInitFromRecord(UDM_SERVER * S,UDM_SQLRES * SQLRes,size_t row)1050 UdmServerInitFromRecord(UDM_SERVER *S, UDM_SQLRES *SQLRes, size_t row)
1051 {
1052   const char *val;
1053   S->site_id= UDM_ATOI(UdmSQLValue(SQLRes, row, 0));
1054   val= UdmSQLValue(SQLRes, row, 1);
1055   if (UDM_OK != UdmMatchSetPattern(&S->Filter.Match, val ? val : ""))
1056     return UDM_ERROR;
1057 
1058   if ((val= UdmSQLValue(SQLRes, row, 2)) && val[0])
1059     UdmVarListReplaceStr(&S->Vars, "Tag", val);
1060 
1061   S->command= *UdmSQLValue(SQLRes, row, 3);
1062   S->weight= UDM_ATOF(UdmSQLValue(SQLRes, row, 4));
1063   S->ordre= UDM_ATOI(UdmSQLValue(SQLRes, row, 5));
1064   S->parent= UDM_ATOI(UdmSQLValue(SQLRes, row, 6));
1065   S->enabled= UDM_TEST(UDM_ATOI(UdmSQLValue(SQLRes, row, 7)));
1066   return UDM_OK;
1067 }
1068 
1069 
1070 static int
UdmServerNeedsUpdate(UDM_SERVER * a,UDM_SERVER * b)1071 UdmServerNeedsUpdate(UDM_SERVER *a, UDM_SERVER *b)
1072 {
1073   /* Note: we don't check "srvinfo" content */
1074   if (a->site_id != b->site_id ||
1075       strcmp(UdmMatchPatternConstStr(&a->Filter.Match),
1076              UdmMatchPatternConstStr(&b->Filter.Match)) ||
1077       a->command != b->command ||
1078       strcmp(UdmVarListFindStr(&a->Vars, "Tag", ""), UdmVarListFindStr(&b->Vars, "Tag", "")) ||
1079       a->weight != b->weight ||
1080       a->ordre != b->ordre ||
1081       a->parent != b->parent ||
1082       a->enabled != b->enabled)
1083     return 1;
1084   return 0;
1085 }
1086 
1087 
1088 static void
UdmFilterInitFromVars(UDM_FILTER * Filter,const UDM_VARLIST * Vars)1089 UdmFilterInitFromVars(UDM_FILTER *Filter, const UDM_VARLIST *Vars)
1090 {
1091   Filter->Match.Param.match_mode= UdmVarListFindInt(Vars, "Match_type", UDM_MATCH_BEGIN);
1092   UdmMatchParamSetCaseInsensitive(&Filter->Match.Param,
1093                                   UDM_TEST(UdmVarListFindInt(Vars, "Case_sense",
1094                                                              UDM_CASE_INSENSITIVE)));
1095   UdmMatchParamSetNegative(&Filter->Match.Param,
1096                            UDM_TEST(UdmVarListFindInt(Vars, "Nomatch", 0)));
1097   Filter->method= UdmMethod(UdmVarListFindStr(Vars, "Method",
1098                                               UdmMethodStr(UDM_METHOD_DEFAULT)));
1099 }
1100 
1101 
1102 static void
UdmFilterToVars(const UDM_FILTER * Filter,UDM_VARLIST * Vars)1103 UdmFilterToVars(const UDM_FILTER *Filter, UDM_VARLIST *Vars)
1104 {
1105   if (Filter->method != UDM_METHOD_DEFAULT)
1106     UdmVarListReplaceStr(Vars, "Method",  UdmMethodStr(Filter->method));
1107   if (Filter->Match.Param.match_mode != UDM_MATCH_BEGIN)
1108     UdmVarListReplaceInt(Vars, "Match_type",  Filter->Match.Param.match_mode);
1109   if (!UdmMatchIsCaseInsensitive(&Filter->Match))
1110     UdmVarListReplaceInt(Vars, "Case_sense",  UDM_CASE_SENSITIVE);
1111   if (UdmMatchIsNegative(&Filter->Match))
1112     UdmVarListReplaceInt(Vars, "Nomatch", 1);
1113 }
1114 
1115 static udm_rc_t
UdmLoadServerTable(UDM_AGENT * Indexer,UDM_SERVERLIST * S,UDM_DB * db)1116 UdmLoadServerTable(UDM_AGENT * Indexer, UDM_SERVERLIST *S, UDM_DB *db)
1117 {
1118   size_t    rows, i, j, jrows;
1119   UDM_SQLRES  SQLRes, SRes;
1120   char    qbuf[1024];
1121   const char *filename= UdmVarListFindStr(UdmSQLDBVars(db), "filename", NULL);
1122   const char *name = (filename && filename[0]) ? filename : "server";
1123   const char *infoname = UdmVarListFindStr(UdmSQLDBVars(db), "srvinfo", "srvinfo");
1124   udm_rc_t rc= UDM_OK;
1125   const char  *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
1126 
1127   udm_snprintf(qbuf,sizeof(qbuf)-1,"SELECT " UDM_SERVER_TABLE_COLUMNS
1128                                    "FROM %s "
1129                                    "WHERE enabled=1 AND parent=%s0%s "
1130                                    "ORDER BY ordre", name, qu, qu);
1131 
1132   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, qbuf)))
1133     return rc;
1134 
1135   rows= UdmSQLNumRows(&SQLRes);
1136   for(i= 0; i < rows; i++)
1137   {
1138     UDM_SERVER *Server= Indexer->Conf->Cfg_Srv;
1139 
1140     if (UDM_OK != (rc= UdmServerInitFromRecord(Server, &SQLRes, i)))
1141       goto ex;
1142 
1143     sprintf(qbuf,"SELECT sname,sval FROM %s WHERE srv_id=%s%i%s", infoname, qu, Server->site_id, qu);
1144     if(UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SRes, qbuf)))
1145       return rc;
1146     jrows= UdmSQLNumRows(&SRes);
1147     for(j= 0; j < jrows; j++)
1148     {
1149       const char *sname = UdmSQLValue(&SRes, j, 0);
1150       const char *sval = UdmSQLValue(&SRes, j, 1);
1151       UdmVarListReplaceStr(&Server->Vars, sname, sval);
1152     }
1153     UdmSQLFree(&SRes);
1154 
1155     Server->webspace= (udm_webspace_t) UdmVarListFindInt(&Server->Vars, "Follow", UDM_WEBSPACE_DEFAULT);
1156     UdmFilterInitFromVars(&Server->Filter, &Server->Vars);
1157 
1158     if (Server->command == 'S')
1159     {
1160       UdmServerAdd(Indexer, Server, 0);
1161       if ((Server->Filter.Match.Param.match_mode == UDM_MATCH_BEGIN) &&
1162           (Indexer->flags & UDM_FLAG_ADD_SERVURL))
1163       {
1164         UDM_HREFPARAM  HrefParam;
1165         UdmHrefParamInit(&HrefParam);
1166         HrefParam.server_id= Server->site_id;
1167         HrefParam.hops= (uint4) UdmVarListFindInt(&Server->Vars, "StartHops", 0);
1168         HrefParam.link_source= UDM_LINK_SOURCE_HTDB;
1169         UdmHrefListAddConst(&Indexer->Conf->Hrefs, &HrefParam,
1170                             UdmMatchPatternConstStr(&Server->Filter.Match));
1171       }
1172     }
1173     else
1174     {
1175       char errstr[128];
1176       UDM_CONST_STR str;
1177       UdmConstStrSetStr(&str, UdmMatchPatternConstStr(&Server->Filter.Match));
1178       rc= UdmFilterListAdd(&Indexer->Conf->Filters,
1179                            &Server->Filter.Match.Param,
1180                            Server->Filter.method,
1181                            &str,
1182                            errstr, sizeof(errstr));
1183       if (rc != UDM_OK)
1184       {
1185         udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db),
1186                     "Error while loading ServerTable '%s' at row %d: %s",
1187                     name, (int) i, errstr);
1188         break;
1189       }
1190     }
1191     UdmMatchFreeAndInit(&Server->Filter.Match);
1192   }
1193 ex:
1194   UdmSQLFree(&SQLRes);
1195   return rc;
1196 }
1197 
1198 
1199 static udm_rc_t
UdmServerTableFlush(UDM_AGENT * A,UDM_DB * db)1200 UdmServerTableFlush(UDM_AGENT *A, UDM_DB *db)
1201 {
1202   udm_rc_t rc;
1203   const char      *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
1204   char str[128];
1205 
1206   udm_snprintf(str, sizeof(str),  "UPDATE server SET enabled=0 WHERE parent=%s0%s", qu, qu);
1207   rc= UdmDBSQLQuery(A, db, NULL, str);
1208   return rc;
1209 }
1210 
1211 
1212 static udm_rc_t
UdmServerTableUpdateSrvInfo(UDM_AGENT * A,UDM_DB * db,UDM_SERVER * S,char * arg)1213 UdmServerTableUpdateSrvInfo(UDM_AGENT *A, UDM_DB *db, UDM_SERVER *S, char *arg)
1214 {
1215   udm_rc_t rc;
1216   size_t i;
1217   UDM_DSTR d;
1218   const char *E= (UdmSQLDBType(db) == UDM_DB_PGSQL && UdmSQLDBVersion(db) >= 80101) ? "E" :"";
1219   UdmDSTRInit(&d, 64);
1220 
1221   UDM_ASSERT(UdmSQLDBConnected(db)); /* make sure E is set to a correct value */
1222 
1223   UdmDSTRAppendf(&d, "DELETE FROM srvinfo WHERE srv_id=%i", S->site_id);
1224   if (UDM_OK != (rc= UdmDBSQLQuery(A, db, NULL, UdmDSTRPtr(&d))))
1225     goto ex;
1226 
1227   for (i= 0; i < S->Vars.nvars; i++)
1228   {
1229     const UDM_VAR *Sec= UdmVarListFindConstByIndex(&S->Vars, i);
1230     if (UdmVarStr(Sec) && UdmVarName(Sec) && strcasecmp(UdmVarName(Sec), "Tag"))
1231     {
1232       UDM_CONST_STR valuebuf, *value= UdmVarGetConstStr(Sec, &valuebuf);
1233       UdmDBSQLEscStr(A, db, arg, value->str, value->length); /* srvinfo */
1234       UdmDSTRReset(&d);
1235       UdmDSTRAppendf(&d, "INSERT INTO srvinfo (srv_id,sname,sval) "
1236                          "VALUES (%i,'%s',%s'%s')",
1237                      S->site_id, UdmVarName(Sec), E, arg);
1238       if(UDM_OK != (rc = UdmDBSQLQuery(A, db, NULL, UdmDSTRPtr(&d))))
1239         goto ex;
1240     }
1241   }
1242 ex:
1243   UdmDSTRFree(&d);
1244   return rc;
1245 }
1246 
1247 
1248 static udm_rc_t
UdmServerTableUpdateWithLock(UDM_AGENT * A,UDM_DB * db,UDM_SERVER * S,const char * buf,char * arg)1249 UdmServerTableUpdateWithLock(UDM_AGENT *A, UDM_DB *db, UDM_SERVER *S,
1250                              const char *buf, char *arg)
1251 {
1252   udm_rc_t rc;
1253   if (UDM_OK != (rc= UdmDBSQLLockOrBegin(A, db, "server WRITE, srvinfo WRITE")) ||
1254       UDM_OK != (rc= UdmDBSQLQuery(A, db, NULL, buf)) ||
1255       UDM_OK != (rc= UdmServerTableUpdateSrvInfo(A, db, S, arg)) ||
1256       UDM_OK != (rc= UdmDBSQLUnlockOrCommit(A, db)))
1257     return rc;
1258   return UDM_OK;
1259 }
1260 
1261 
1262 static udm_rc_t
UdmServerTableAdd(UDM_AGENT * A,UDM_SERVERLIST * S,UDM_DB * db)1263 UdmServerTableAdd(UDM_AGENT *A, UDM_SERVERLIST *S, UDM_DB *db)
1264 {
1265   udm_rc_t res= UDM_OK;
1266   int found;
1267   const char  *alias=UdmVarListFindStr(&S->Server->Vars,"Alias",NULL);
1268   const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
1269   size_t    i, len= 0;
1270   char    *buf, *arg;
1271   UDM_VARLIST  *Vars= &S->Server->Vars;
1272   UDM_SERVER Old;
1273 
1274   UdmServerInit(&Old);
1275 
1276   S->Server->site_id= UdmStrHash32(UdmMatchPatternConstStr(&S->Server->Filter.Match));
1277 
1278   for (i=0; i < Vars->nvars; i++)
1279   {
1280     const UDM_VAR *Var= UdmVarListFindConstByIndex(Vars, i);
1281     len= udm_max(len, UdmVarLength(Var));
1282   }
1283 
1284   len+= UdmMatchPatternConstStr(&S->Server->Filter.Match) ?
1285         strlen(UdmMatchPatternConstStr(&S->Server->Filter.Match)) : 0;
1286   len+= alias ? strlen(alias) : 0;
1287   len+= 2048;
1288 
1289   buf = (char*)UdmMalloc(len);
1290   arg = (char*)UdmMalloc(len);
1291   if (buf == NULL || arg == NULL)
1292   {
1293     UDM_FREE(buf);
1294     UDM_FREE(arg);
1295     strcpy(UdmDBSQLError(db), "Out of memory");
1296     UdmSQL(db)->errcode = 1;
1297     return UDM_ERROR;
1298   }
1299 
1300   for (found= 0; ; S->Server->site_id++)
1301   {
1302     UDM_SQLRES SQLRes;
1303     udm_snprintf(buf, len, "SELECT " UDM_SERVER_TABLE_COLUMNS
1304                            "FROM server WHERE rec_id=%s%d%s",
1305                            qu, S->Server->site_id, qu);
1306     if (UDM_OK != (res= UdmDBSQLQuery(A, db, &SQLRes, buf)))
1307       goto ex;
1308 
1309     if (!UdmSQLNumRows(&SQLRes))
1310     {
1311       UdmSQLFree(&SQLRes);
1312       break; /* Not found */
1313     }
1314 
1315     UdmServerInitFromRecord(&Old, &SQLRes, 0);
1316     found= !strcasecmp(UdmMatchPatternConstStr(&S->Server->Filter.Match),
1317                        UdmSQLValue(&SQLRes, 0, 1));
1318     UdmSQLFree(&SQLRes);
1319 
1320     if (found)
1321       break;
1322   }
1323 
1324   if (S->Server->webspace != UDM_WEBSPACE_DEFAULT)
1325     UdmVarListReplaceInt(&S->Server->Vars, "Follow",  S->Server->webspace);
1326   UdmFilterToVars(&S->Server->Filter, &S->Server->Vars);
1327 
1328   UdmDBSQLEscStr(A, db, arg,  /* Server pattern */
1329                  UDM_NULL2EMPTY(UdmMatchPatternConstStr(&S->Server->Filter.Match)),
1330                  strlen(UDM_NULL2EMPTY(UdmMatchPatternConstStr(&S->Server->Filter.Match))));
1331 
1332   if (!found)
1333   {
1334     udm_snprintf(buf, len,
1335                  "INSERT INTO server (rec_id, enabled, tag,"
1336                  " command, parent, ordre, weight, url, pop_weight) "
1337                  " VALUES "
1338                  "(%s%d%s, 1, '%s', '%c', %s%d%s, %d, %f, '%s', 0)",
1339          qu, S->Server->site_id, qu,
1340          UdmVarListFindStr(&S->Server->Vars, "Tag", ""),
1341          S->Server->command,
1342          qu, S->Server->parent, qu,
1343          S->Server->ordre,
1344          S->Server->weight,
1345          arg
1346      );
1347     if (UDM_OK != (res= UdmServerTableUpdateWithLock(A, db, S->Server, buf, arg)))
1348       goto ex;
1349   }
1350   else
1351   {
1352     if (UdmServerNeedsUpdate(S->Server, &Old))
1353     {
1354       udm_snprintf(buf, len,
1355                    "UPDATE server SET enabled=1, tag='%s',"
1356                    "command='%c', parent=%s%i%s, ordre=%d, weight=%f "
1357                    "WHERE rec_id=%s%d%s",
1358                     UdmVarListFindStr(&S->Server->Vars, "Tag", ""),
1359                     S->Server->command,
1360                     qu, S->Server->parent, qu,
1361                     S->Server->ordre,
1362                     S->Server->weight,
1363                     qu, S->Server->site_id, qu);
1364       if (UDM_OK != (res= UdmServerTableUpdateWithLock(A, db, S->Server, buf, arg)))
1365         goto ex;
1366     }
1367   }
1368 
1369   UDM_ASSERT(res == UDM_OK);
1370 
1371 ex:
1372   UDM_FREE(buf);
1373   UDM_FREE(arg);
1374   UdmServerFree(&Old);
1375   return res;
1376 }
1377 
1378 
1379 static udm_rc_t
UdmSrvActionSQL(UDM_AGENT * A,UDM_DB * db,UDM_SERVERLIST * S,udm_srvcmd_t cmd)1380 UdmSrvActionSQL(UDM_AGENT *A, UDM_DB *db, UDM_SERVERLIST *S, udm_srvcmd_t cmd)
1381 {
1382   switch(cmd)
1383   {
1384     case UDM_SRV_ACTION_TABLE:
1385       return UdmLoadServerTable(A,S,db);
1386     case UDM_SRV_ACTION_FLUSH:
1387       return UdmServerTableFlush(A, db);
1388     case UDM_SRV_ACTION_ADD:
1389       return UdmServerTableAdd(A, S, db);
1390   }
1391   UdmLog(A, UDM_LOG_ERROR, "Unsupported Srv Action SQL");
1392   return UDM_ERROR;
1393 }
1394 
1395 
1396 /********** Searching for URL_ID by various parameters ****************/
1397 
1398 static urlid_t
UdmURLIdCacheFind(const UDM_URLID_CACHE * Cache,const char * url)1399 UdmURLIdCacheFind(const UDM_URLID_CACHE *Cache, const char *url)
1400 {
1401   size_t i;
1402   for (i= 0; i < UDM_FINDURL_CACHE_SIZE; i++)
1403   {
1404     if (Cache->url[i] && !strcmp(url, Cache->url[i]))
1405       return Cache->id[i];
1406   }
1407   return 0;
1408 }
1409 
1410 
1411 static udm_rc_t
UdmURLIdCacheAdd(UDM_URLID_CACHE * Cache,const char * url,urlid_t id)1412 UdmURLIdCacheAdd(UDM_URLID_CACHE *Cache, const char *url, urlid_t id)
1413 {
1414   UDM_FREE(Cache->url[Cache->pURLCache]);
1415   if (!(Cache->url[Cache->pURLCache]= (char*) UdmStrdup(url)))
1416     return UDM_ERROR;
1417   Cache->id[Cache->pURLCache]= id;
1418   Cache->pURLCache= (Cache->pURLCache + 1) % UDM_FINDURL_CACHE_SIZE;
1419   return UDM_OK;
1420 }
1421 
1422 
1423 static void
UdmURLIdCacheFree(UDM_URLID_CACHE * Cache)1424 UdmURLIdCacheFree(UDM_URLID_CACHE *Cache)
1425 {
1426   size_t i;
1427   for(i= 0; i < UDM_FINDURL_CACHE_SIZE; i++)
1428     UDM_FREE(Cache->url[i]);
1429 }
1430 
1431 
1432 static udm_rc_t
UdmFindURL(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)1433 UdmFindURL(UDM_AGENT *Indexer, UDM_DOCUMENT * Doc, UDM_DB *db)
1434 {
1435   UDM_SQLRES  SQLRes;
1436   const char  *url=UdmVarListFindStr(&Doc->Sections,"URL","");
1437   udmhash32_t  id = 0;
1438   udm_rc_t rc= UDM_OK;
1439   udm_bool_t UseCRC32URLId= UdmVarListFindBool(&Indexer->Conf->Vars, "UseCRC32URLId", UDM_FALSE);
1440 
1441   if (UseCRC32URLId)
1442   {
1443     /* Auto generation of rec_id */
1444     /* using CRC32 algorythm     */
1445     id= UdmStrHash32(url);
1446   }
1447   else
1448   {
1449     const char *o;
1450     char *qbuf, *e_url;
1451     size_t i, l, url_length= strlen(url);
1452 
1453     /* Escape URL string */
1454     if ((e_url = (char*)UdmMalloc(l = (8 * url_length + 1))) == NULL ||
1455         (qbuf = (char*)UdmMalloc( l + 100 )) == NULL)
1456     {
1457       UDM_FREE(e_url);
1458       UdmLog(Indexer, UDM_LOG_ERROR, "Out of memory");
1459       return UDM_ERROR;
1460     }
1461     UdmDBSQLEscStr(Indexer, db, e_url, url, url_length);
1462 
1463     if (!(id= UdmURLIdCacheFind(&UdmSQLDB(db)->URLIdCache, e_url)))
1464     {
1465       udm_snprintf(qbuf, l + 100, "SELECT rec_id FROM url WHERE url='%s'",e_url);
1466       if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, qbuf)))
1467       {
1468         UDM_FREE(e_url);
1469         UDM_FREE(qbuf);
1470         return rc;
1471       }
1472       for(i=0;i<UdmSQLNumRows(&SQLRes);i++)
1473       {
1474         if((o=UdmSQLValue(&SQLRes,i,0)))
1475         {
1476           id=atoi(o);
1477           break;
1478         }
1479       }
1480       UdmSQLFree(&SQLRes);
1481       rc= UdmURLIdCacheAdd(&UdmSQLDB(db)->URLIdCache, e_url, id);
1482     }
1483     UDM_FREE(e_url);
1484     UDM_FREE(qbuf);
1485   }
1486   UdmVarListReplaceInt(&Doc->Sections, "ID", id);
1487   return  rc;
1488 }
1489 
1490 
1491 static udm_rc_t
UdmFindMessage(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)1492 UdmFindMessage(UDM_AGENT *Indexer, UDM_DOCUMENT * Doc, UDM_DB *db)
1493 {
1494   size_t     i, len;
1495   char     *qbuf;
1496   char     *eid;
1497   UDM_SQLRES  SQLRes;
1498   const char  *message_id=UdmVarListFindStr(&Doc->Sections,"Header.Message-ID",NULL);
1499   udm_rc_t rc;
1500 
1501   if(!message_id)
1502     return UDM_OK;
1503 
1504   len = strlen(message_id);
1505   eid = (char*)UdmMalloc(4 * len + 1);
1506   if (eid == NULL) return UDM_ERROR;
1507   qbuf = (char*)UdmMalloc(4 * len + 128);
1508   if (qbuf == NULL)
1509   {
1510     UDM_FREE(eid);
1511     return UDM_ERROR;
1512   }
1513 
1514   /* Escape URL string */
1515   UdmDBSQLEscStr(Indexer, db, eid, message_id, len); /* Message ID */
1516 
1517   udm_snprintf(qbuf, 4 * len + 128,
1518      "SELECT rec_id FROM url u, urlinfo i WHERE u.rec_id=i.url_id AND i.sname='Message-ID' AND i.sval='%s'", eid);
1519   rc= UdmDBSQLQuery(Indexer, db, &SQLRes, qbuf);
1520   UDM_FREE(qbuf);
1521   UDM_FREE(eid);
1522   if (UDM_OK != rc)
1523     return rc;
1524 
1525   for(i=0;i<UdmSQLNumRows(&SQLRes);i++)
1526   {
1527     const char * o;
1528     if((o=UdmSQLValue(&SQLRes,i,0)))
1529     {
1530       UdmVarListReplaceInt(&Doc->Sections,"ID", UDM_ATOI(o));
1531       break;
1532     }
1533   }
1534   UdmSQLFree(&SQLRes);
1535   return(UDM_OK);
1536 }
1537 
1538 
1539 /********************* Limits ********************/
1540 
1541 
1542 udm_rc_t
UdmLoadSlowLimit(UDM_AGENT * A,UDM_DB * db,UDM_URLID_LIST * list,const char * q)1543 UdmLoadSlowLimit(UDM_AGENT *A, UDM_DB *db, UDM_URLID_LIST *list, const char *q)
1544 {
1545   udm_timer_t ticks= UdmStartTimer();
1546   size_t i;
1547   udm_rc_t rc;
1548   UDM_SQLRES SQLRes;
1549   int exclude= list->exclude;
1550   bzero((void*) list, sizeof(UDM_URLID_LIST));
1551   list->exclude= exclude;
1552   if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLRes, q)))
1553     goto ret;
1554 
1555   if (!(list->nurls= UdmSQLNumRows(&SQLRes)))
1556     goto sqlfree;
1557 
1558   if (!(list->urls= (urlid_t *) UdmMalloc(sizeof(urlid_t) * list->nurls)))
1559   {
1560     rc= UDM_ERROR;
1561     list->nurls= 0;
1562     goto ret;
1563   }
1564   for (i= 0; i < list->nurls; i++)
1565   {
1566     list->urls[i]= atoi(UdmSQLValue(&SQLRes, i, 0));
1567   }
1568 
1569 sqlfree:
1570   UdmLog(A, UDM_LOG_DEBUG, "Limit query retured %d rows: %.2f",
1571          (int) list->nurls, UdmStopTimer(&ticks));
1572   UdmSQLFree(&SQLRes);
1573 ret:
1574   return rc;
1575 }
1576 
1577 
1578 static udm_rc_t
UdmLoadSlowLimitWithSort(UDM_AGENT * A,UDM_DB * db,UDM_URLID_LIST * list,const char * q)1579 UdmLoadSlowLimitWithSort(UDM_AGENT *A, UDM_DB *db,
1580                          UDM_URLID_LIST *list, const char *q)
1581 {
1582   udm_rc_t rc= UdmLoadSlowLimit(A, db, list, q);
1583   if (rc == UDM_OK)
1584     UdmURLIdListSort(list);
1585   return rc;
1586 }
1587 
1588 
1589 static udm_rc_t
UdmSlowLimitLoadForConv(UDM_AGENT * A,UDM_DB * db,UDM_URLID_LIST * fl_urls,const char * fl)1590 UdmSlowLimitLoadForConv(UDM_AGENT *A,
1591                         UDM_DB *db,
1592                         UDM_URLID_LIST *fl_urls,
1593                         const char *fl)
1594 {
1595   udm_rc_t rc= UDM_OK;
1596   udm_timer_t ticks= UdmStartTimer();
1597   char name[64];
1598   const char *q;
1599 
1600   bzero((void*) fl_urls, sizeof(*fl_urls));
1601 
1602   UdmLog(A, UDM_LOG_INFO, "Loading fast limit '%s'", fl);
1603   if ((fl_urls->exclude= (fl[0] == '-')))
1604     fl++;
1605 
1606   udm_snprintf(name, sizeof(name), "Limit.%s", fl);
1607   if (!(q= UdmVarListFindStr(&A->Conf->Vars, name, NULL)))
1608   {
1609     UdmLog(A, UDM_LOG_ERROR, "Limit '%s' not specified", fl);
1610     return UDM_ERROR;
1611   }
1612 
1613   if (UDM_OK != (rc= UdmLoadSlowLimitWithSort(A, db, fl_urls, q)))
1614     return rc;
1615   UdmLog(A, UDM_LOG_DEBUG, "Limit '%s' loaded%s, %d records, %.2f sec",
1616          fl, fl_urls->exclude ? " type=excluding" : "", (int) fl_urls->nurls,
1617          UdmStopTimer(&ticks));
1618   return rc;
1619 }
1620 
1621 
1622 
1623 /******************** Orders ********************************/
1624 
1625 /*
1626   Apply a sorted UserOrder to UDM_URLDATALIST
1627 */
1628 static udm_rc_t
UdmApplyFastOrderToURLDataList(UDM_URLDATALIST * Data,UDM_URL_INT4_LIST * Order)1629 UdmApplyFastOrderToURLDataList(UDM_URLDATALIST *Data,
1630                                UDM_URL_INT4_LIST *Order)
1631 {
1632   UDM_URLDATA *d= Data->Item;
1633   UDM_URLDATA *de= Data->Item + Data->nitems;
1634 
1635   if (!Order->nitems)
1636     return UDM_OK;
1637 
1638   for ( ; d < de; d++)
1639   {
1640     UDM_URL_INT4 *found;
1641     if ((found= (UDM_URL_INT4*) UdmBSearch(&d->url_id,
1642                                            Order->Item,
1643                                            Order->nitems,
1644                                            sizeof(UDM_URL_INT4),
1645                                            (udm_qsort_cmp)UdmCmpURLID)))
1646     {
1647       char buf[64];
1648       sprintf(buf, "%08X", found->param);
1649       d->section= UdmStrdup(buf);
1650     }
1651     else
1652     {
1653       d->section= UdmStrdup("00000001");
1654     }
1655   }
1656   return UDM_OK;
1657 }
1658 
1659 
1660 static udm_rc_t
UdmFastOrderLoadAndApplyToURLDataList(UDM_AGENT * Agent,UDM_DB * db,UDM_URLDATALIST * Data,const char * name,size_t * norder)1661 UdmFastOrderLoadAndApplyToURLDataList(UDM_AGENT *Agent,
1662                                       UDM_DB *db,
1663                                       UDM_URLDATALIST *Data,
1664                                       const char *name,
1665                                       size_t *norder)
1666 {
1667   UDM_URL_INT4_LIST Order;
1668   udm_rc_t rc;
1669 
1670   if ((UDM_OK != (rc= UdmBlobLoadFastOrder(Agent, db, &Order, name))) ||
1671       !Order.nitems)
1672     goto ret;
1673 
1674   rc= UdmApplyFastOrderToURLDataList(Data, &Order);
1675 
1676 ret:
1677   *norder= Order.nitems;
1678   UDM_FREE(Order.Item);
1679   return rc;
1680 }
1681 
1682 
1683 
1684 /******************** URLData *******************************/
1685 
1686 static udm_rc_t
UdmLoadURLDataFromURLForConv(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,const UDM_URLID_LIST * fl_urls)1687 UdmLoadURLDataFromURLForConv(UDM_AGENT *A,
1688                              UDM_DB *db,
1689                              UDM_QUERY *Query,
1690                              const UDM_URLID_LIST *fl_urls)
1691 {
1692   udm_rc_t rc;
1693   udm_timer_t ticks= UdmStartTimer();
1694   char qbuf[4*1024];
1695   udm_bool_t need_url= UDM_TRUE; /* TODO34: remove this variable */
1696   /* TODO34: do not load the "url" column if no url.* sections defined */
1697   const char *urlfield, *urltable;
1698   size_t nbytes, i, j;
1699   UDM_SQLRES  SQLres;
1700   const char *dummy_where;
1701 
1702   bzero((void*) &Query->URLData, sizeof(Query->URLData));
1703   if (UDM_OK != (rc= UdmSQLBuildWhereCondition(A, db, Query, &dummy_where)))
1704     return rc;
1705 
1706   UdmLog(A, UDM_LOG_INFO, "Loading URL list");
1707 
1708   urlfield= need_url ? ", url.url" : "";
1709   urltable= (Query->from && Query->from[0]) ? "url." : "";
1710   udm_snprintf(qbuf, sizeof(qbuf),
1711               "SELECT %srec_id, last_mod_time%s"
1712               " FROM url%s%s%s",
1713               urltable, urlfield, Query->from,
1714               Query->where[0] ? " WHERE " : "", Query->where);
1715 
1716   if (UDM_OK != (rc = UdmDBSQLQuery(A, db, &SQLres, qbuf)))
1717     goto fin;
1718 
1719   Query->URLData.nitems= UdmSQLNumRows(&SQLres);
1720   nbytes= Query->URLData.nitems * sizeof(UDM_URLDATA);
1721   Query->URLData.Item= (UDM_URLDATA*) UdmMalloc(nbytes);
1722 
1723   for (i= 0, j= 0; i < Query->URLData.nitems; i++)
1724   {
1725     UDM_URLDATA *Data= &Query->URLData.Item[j];
1726     urlid_t url_id= UDM_ATOI(UdmSQLValue(&SQLres, i, 0));
1727     if (fl_urls->nurls)
1728     {
1729       void *found= UdmBSearch(&url_id, fl_urls->urls, fl_urls->nurls,
1730                               sizeof(urlid_t), (udm_qsort_cmp)UdmCmpURLID);
1731       if (found && fl_urls->exclude)
1732         continue;
1733       if (!found && !fl_urls->exclude)
1734         continue;
1735     }
1736     Data->url_id= url_id;
1737     Data->score= 0;
1738     Data->per_site= 0;
1739     Data->pop_rank= 0;
1740     Data->site_id= 0;
1741     Data->last_mod_time= UDM_ATOI(UdmSQLValue(&SQLres, i, 1));
1742     Data->url= need_url ? UdmStrdup(UdmSQLValue(&SQLres, i, 2)) : NULL;
1743     Data->section= NULL;
1744     j++;
1745   }
1746   Query->URLData.nitems= j;
1747 
1748   UdmURLDataListSort(&Query->URLData);
1749   UdmSQLFree(&SQLres);
1750 
1751 fin:
1752   UdmLog(A, UDM_LOG_INFO, "URL list loaded: %d documents, %.2f sec",
1753                            (int) Query->URLData.nitems, UdmStopTimer(&ticks));
1754   return rc;
1755 }
1756 
1757 
1758 static udm_rc_t
UdmLoadURLDataFromURLAndSlowLimitForConv(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)1759 UdmLoadURLDataFromURLAndSlowLimitForConv(UDM_AGENT *A,
1760                                          UDM_DB *db,
1761                                          UDM_QUERY *Query)
1762 {
1763   udm_rc_t rc= UDM_OK;
1764   const char *fl= UdmVarListFindStr(&A->Conf->Vars, "fl", NULL);
1765   UDM_URLID_LIST fl_urls;
1766 
1767   bzero((void*)&fl_urls, sizeof(fl_urls));
1768 
1769   if (fl && (UDM_OK != (rc= UdmSlowLimitLoadForConv(A, db, &fl_urls, fl))))
1770     return rc;
1771 
1772   rc= UdmLoadURLDataFromURLForConv(A, db, Query, &fl_urls);
1773 
1774   UDM_FREE(fl_urls.urls);
1775 
1776   return rc;
1777 }
1778 
1779 
1780 /*
1781   Load the section with the given name from the table "urlinfo",
1782   for sorting by section: "s=S&su=section".
1783 */
1784 static udm_rc_t
UdmLoadURLDataFromURLInfoUsingIN(UDM_AGENT * A,UDM_DB * db,UDM_URLDATALIST * DataList,const char * esu)1785 UdmLoadURLDataFromURLInfoUsingIN(UDM_AGENT *A,
1786                                  UDM_DB *db,
1787                                  UDM_URLDATALIST *DataList,
1788                                  const char *esu)
1789 {
1790   udm_rc_t rc= UDM_OK;
1791   size_t offs;
1792   char qbuf[4*1024];
1793 
1794   for (offs= 0; offs < DataList->nitems; offs+= 256)
1795   {
1796     size_t nrows, s, i;
1797     int notfirst= 0;
1798     UDM_SQLRES SQLres;
1799     char *end= qbuf + sprintf(qbuf, "SELECT url_id, sval"
1800                               " FROM urlinfo"
1801                               " WHERE sname='%s' AND url_id IN (", esu);
1802 
1803     for (i= 0; (i < 256) && (offs + i < DataList->nitems); i++)
1804     {
1805       end+= sprintf(end, "%s%i", (notfirst) ? "," : "",
1806                     DataList->Item[offs + i].url_id);
1807       notfirst= 1;
1808     }
1809     end+= sprintf(end, ") ORDER BY url_id");
1810 
1811     if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLres, qbuf)))
1812       goto fin;
1813 
1814     nrows= UdmSQLNumRows(&SQLres);
1815 
1816     for(i= 0, s= i + offs; i < nrows; s++)
1817     {
1818       if (s == DataList->nitems)
1819         break;
1820       if (DataList->Item[s].url_id != (urlid_t) UDM_ATOI(UdmSQLValue(&SQLres, i, 0)))
1821       {
1822         DataList->Item[s].section= UdmStrdup("");
1823       }
1824       else
1825       {
1826         DataList->Item[s].section= UdmStrdup(UdmSQLValue(&SQLres, i, 1));
1827         i++;
1828       }
1829     }
1830     UdmSQLFree(&SQLres);
1831   }
1832 
1833 fin:
1834   return rc;
1835 }
1836 
1837 
1838 /*
1839   Load URL data from "url" for sorting by:
1840   last_mod_time
1841   url
1842   section
1843 */
1844 static udm_rc_t
UdmLoadURLDataFromURLUsingIN(UDM_AGENT * A,UDM_DB * db,UDM_URLDATALIST * DataList,int flag)1845 UdmLoadURLDataFromURLUsingIN(UDM_AGENT *A,
1846                              UDM_DB *db,
1847                              UDM_URLDATALIST *DataList,
1848                              int flag)
1849 {
1850   int need_url= (flag & (UDM_URLDATA_URL | UDM_URLDATA_SITE));
1851   udm_rc_t rc= UDM_OK;
1852   char qbuf[4*1024];
1853   UDM_SQLRES SQLres;
1854   UDM_STR row[5];
1855   size_t j;
1856   const char *hi_priority= UdmSQLDBType(db) == UDM_DB_MYSQL ? "HIGH_PRIORITY " : " ";
1857 
1858   for (j= 0; j < DataList->nitems; j+= 256)
1859   {
1860     size_t i;
1861     int notfirst = 0;
1862     udm_snprintf(qbuf, sizeof(qbuf),
1863             "SELECT %srec_id,last_mod_time%s"
1864             " FROM url"
1865             " WHERE rec_id IN (",
1866             hi_priority,
1867             need_url ? ",url" : "");
1868     for (i= 0; (i < 256) && (j + i < DataList->nitems); i++)
1869     {
1870       sprintf(UDM_STREND(qbuf), "%s%i", (notfirst) ? "," : "", DataList->Item[j + i].url_id);
1871       notfirst= 1;
1872     }
1873     sprintf(UDM_STREND(qbuf), ") ORDER BY rec_id");
1874     if (UDM_OK != (rc= UdmDBSQLExecDirect(A, db, &SQLres, qbuf)))
1875       goto fin;
1876     for (i= 0; UdmDBSQLFetchRow(A, db, &SQLres, row) == UDM_OK; i++)
1877     {
1878       UDM_URLDATA *D= &DataList->Item[i + j];
1879       if (D->url_id != (urlid_t) UDM_ATOI(row[0].str))
1880       {
1881         UdmLog(A, UDM_LOG_ERROR, "Dat url_id (%d) != SQL url_id (%d)",
1882                D->url_id, UDM_ATOI(row[0].str));
1883       }
1884       D->last_mod_time= UDM_ATOI(row[1].str);
1885       if (need_url)
1886       {
1887         size_t sitelen= UdmAbsoluteURLSiteLength(row[2].str);
1888         D->site_id= UdmHash32(row[2].str, sitelen);
1889         D->url= UdmStrdup(row[2].str);
1890       }
1891       else
1892       {
1893         D->url= NULL;
1894         D->site_id= 0;
1895       }
1896       D->pop_rank= 0;
1897       D->section= NULL;
1898     }
1899     UdmSQLFree(&SQLres);
1900 
1901   }
1902 
1903 fin:
1904   return rc;
1905 }
1906 
1907 
1908 static udm_rc_t
UdmLoadURLDataFromURLUsingLoop(UDM_AGENT * A,UDM_DB * db,UDM_URLDATALIST * DataList,int flag)1909 UdmLoadURLDataFromURLUsingLoop(UDM_AGENT *A, UDM_DB *db,
1910                                UDM_URLDATALIST *DataList, int flag)
1911 {
1912   udm_rc_t rc= UDM_OK;
1913   char qbuf[256];
1914   size_t i;
1915   int need_url= (flag & UDM_URLDATA_URL);
1916   const char *hi_priority= UdmSQLDBType(db) == UDM_DB_MYSQL ? "HIGH_PRIORITY" : "";
1917 
1918   for (i = 0; i < DataList->nitems; i++)
1919   {
1920     UDM_SQLRES SQLres;
1921     UDM_URLDATA *D= &DataList->Item[i];
1922     udm_snprintf(qbuf, sizeof(qbuf),
1923                  "SELECT %s last_mod_time%s"
1924                  " FROM url WHERE rec_id=%i",
1925                  hi_priority,
1926                  need_url ? ",url" : "",
1927                  DataList->Item[i].url_id);
1928     if (UDM_OK != (rc = UdmDBSQLQuery(A, db, &SQLres, qbuf)))
1929       goto fin;
1930     if(UdmSQLNumRows(&SQLres))
1931     {
1932       D->url_id= DataList->Item[i].url_id;
1933       D->site_id= 0;
1934       D->last_mod_time= UDM_ATOI(UdmSQLValue(&SQLres, 0, 0));
1935       D->url= need_url ? UdmStrdup(UdmSQLValue(&SQLres, 0, 1)) : NULL;
1936       D->pop_rank= 0;
1937       D->section= NULL;
1938     }
1939     UdmSQLFree(&SQLres);
1940   }
1941 
1942 fin:
1943   return rc;
1944 }
1945 
1946 
1947 static udm_rc_t
UdmLoadURLDataFromURL(UDM_AGENT * A,UDM_DB * db,UDM_URLDATALIST * DataList,int flag)1948 UdmLoadURLDataFromURL(UDM_AGENT *A, UDM_DB *db,
1949                       UDM_URLDATALIST *DataList, int flag)
1950 {
1951   udm_rc_t rc= UDM_OK;
1952   udm_bool_t use_urlbasicinfo= UdmVarListFindBool(&A->Conf->Vars, "LoadURLBasicInfo", UDM_TRUE);
1953   const char *su= UdmVarListFindStr(&A->Conf->Vars, "su", NULL);
1954 
1955   /* Check that DataList is not empty and is sorted by url_id */
1956   UDM_ASSERT(DataList->nitems);
1957   UDM_ASSERT(DataList->Item[0].url_id <= DataList->Item[DataList->nitems - 1].url_id);
1958 
1959   if (!use_urlbasicinfo)
1960   {
1961     UdmLog(A,UDM_LOG_DEBUG,"Not using basic URL data from url");
1962     UdmURLDataListClearParams(DataList);
1963   }
1964   else if (UdmSQLDBHaveIn(db))
1965   {
1966     UdmLog(A,UDM_LOG_DEBUG,"Trying to load URL data from url");
1967     rc= UdmLoadURLDataFromURLUsingIN(A, db, DataList, flag);
1968   }
1969   else
1970   {
1971     UdmLog(A,UDM_LOG_DEBUG,"Trying to load URL data from url, not using IN");
1972     rc= UdmLoadURLDataFromURLUsingLoop(A, db, DataList, flag);
1973   }
1974 
1975   if ((flag & UDM_URLDATA_SU) && su && su[0])
1976   {
1977     char *esu=su ? UdmDBSQLEscStrSimple(A, db, NULL, su, strlen(su)) : NULL; /* User sort name */
1978     rc= UdmLoadURLDataFromURLInfoUsingIN(A, db, DataList, esu);
1979     UDM_FREE(esu);
1980   }
1981 
1982   return rc;
1983 }
1984 
1985 
1986 
1987 /****************************** User score **************************/
1988 
1989 udm_rc_t
UdmUserScoreListLoad(UDM_AGENT * A,UDM_DB * db,UDM_URL_INT4_LIST * List,const char * q)1990 UdmUserScoreListLoad(UDM_AGENT *A, UDM_DB *db,
1991                      UDM_URL_INT4_LIST *List, const char *q)
1992 {
1993   size_t i;
1994   udm_rc_t rc;
1995   UDM_SQLRES SQLRes;
1996   udm_timer_t ticks= UdmStartTimer();
1997 
1998   bzero((void*) List, sizeof(UDM_URL_INT4_LIST));
1999 
2000   if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLRes, q)))
2001     goto ret;
2002 
2003   if (!(List->nitems= UdmSQLNumRows(&SQLRes)))
2004     goto sqlfree;
2005 
2006   if (2 != UdmSQLNumCols(&SQLRes))
2007   {
2008     udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db),
2009                  "User Score query must return 2 columns, returned %d columns",
2010                  (int) UdmSQLNumCols(&SQLRes));
2011     rc= UDM_ERROR;
2012     UdmSQL(db)->errcode= 1;
2013     goto sqlfree;
2014   }
2015 
2016   if (!(List->Item= (UDM_URL_INT4*) UdmMalloc(sizeof(UDM_URL_INT4) * List->nitems)))
2017   {
2018     rc= UDM_ERROR;
2019     List->nitems= 0;
2020     goto sqlfree;
2021   }
2022   for (i= 0; i < List->nitems; i++)
2023   {
2024     List->Item[i].url_id= atoi(UdmSQLValue(&SQLRes, i, 0));
2025     List->Item[i].param= atoi(UdmSQLValue(&SQLRes, i, 1));
2026   }
2027   UdmSort(List->Item, List->nitems, sizeof(UDM_URL_INT4), (udm_qsort_cmp)UdmCmpURLID);
2028 
2029   UdmLog(A, UDM_LOG_DEBUG,
2030          "UserScore query returned %d columns, %d rows: %.2f",
2031          (int) UdmSQLNumCols(&SQLRes), (int) List->nitems, UdmStopTimer(&ticks));
2032 
2033 
2034 sqlfree:
2035   UdmSQLFree(&SQLRes);
2036 ret:
2037   return rc;
2038 }
2039 
2040 
2041 static udm_rc_t
UdmUserScoreListLoadAndApplyToURLScoreList(UDM_AGENT * Agent,UDM_URLSCORELIST * List,UDM_DB * db,const UDM_QUERY_PARAM * prm)2042 UdmUserScoreListLoadAndApplyToURLScoreList(UDM_AGENT *Agent,
2043                                            UDM_URLSCORELIST *List,
2044                                            UDM_DB *db,
2045                                            const UDM_QUERY_PARAM *prm)
2046 {
2047   char name[128];
2048   const char *us, *query;
2049   UDM_URL_INT4_LIST UserScoreList;
2050   udm_rc_t rc;
2051   udm_timer_t ticks= UdmStartTimer();
2052 
2053   if (!prm->UserScoreFactor ||
2054       !(us= UdmVarListFindStr(&Agent->Conf->Vars, "us", NULL)))
2055     return UDM_OK;
2056 
2057   UdmLog(Agent, UDM_LOG_DEBUG, "Start loading UserScore '%s'", us);
2058 
2059   udm_snprintf(name, sizeof(name), "Score.%s", us);
2060   query= UdmVarListFindStr(&Agent->Conf->Vars, name, NULL);
2061 
2062   if (UDM_OK != (rc= query ?
2063                  UdmUserScoreListLoad(Agent, db, &UserScoreList, query) :
2064                  UdmBlobLoadFastScore(Agent, db, &UserScoreList, us)) ||
2065       !UserScoreList.nitems)
2066     goto ret;
2067 
2068   rc= UdmUserScoreListApplyToURLScoreList(Agent, List, &UserScoreList, prm);
2069 
2070 ret:
2071   UdmLog(Agent, UDM_LOG_DEBUG, "%-30s%.2f (%d docs found)",
2072          "Stop  loading UserScore",  UdmStopTimer(&ticks), (int) UserScoreList.nitems);
2073   UDM_FREE(UserScoreList.Item);
2074   return rc;
2075 }
2076 
2077 
2078 static udm_rc_t
UdmUserSiteScoreListLoadAndApplyToURLDataList(UDM_AGENT * Agent,UDM_URLDATALIST * List,UDM_DB * db,const UDM_QUERY_PARAM * prm)2079 UdmUserSiteScoreListLoadAndApplyToURLDataList(UDM_AGENT *Agent,
2080                                               UDM_URLDATALIST *List,
2081                                               UDM_DB *db,
2082                                               const UDM_QUERY_PARAM *prm)
2083 {
2084   char name[128];
2085   const char *us, *query;
2086   UDM_URL_INT4_LIST UserScoreList;
2087   udm_rc_t rc;
2088 
2089   if (!prm->UserScoreFactor ||
2090       !(us= UdmVarListFindStr(&Agent->Conf->Vars, "ss", NULL)))
2091     return UDM_OK;
2092   udm_snprintf(name, sizeof(name), "SiteScore.%s", us);
2093   if (!(query= UdmVarListFindStr(&Agent->Conf->Vars, name, NULL)))
2094     return UDM_OK;
2095 
2096   if ((UDM_OK != (rc= UdmUserScoreListLoad(Agent, db,
2097                                            &UserScoreList, query))) ||
2098       !UserScoreList.nitems)
2099     goto ret;
2100 
2101   rc= UdmUserScoreListApplyToURLDataList(Agent, List, &UserScoreList, prm);
2102 
2103 ret:
2104   UDM_FREE(UserScoreList.Item);
2105   return rc;
2106 }
2107 
2108 
2109 
2110 /*********************** Creating fast index ******************/
2111 
2112 static udm_rc_t
UdmIndexSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)2113 UdmIndexSQL(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
2114 {
2115   udm_rc_t rc;
2116   if (UDM_OK != (rc= UdmLoadURLDataFromURLAndSlowLimitForConv(Indexer, db, Query)))
2117     return rc;
2118 
2119   rc= UdmSQLDBModeHandler(db)->QueryAction(Indexer, db, Query, UDM_QUERYCMD_INDEX);
2120 
2121   UdmURLDataListFree(&Query->URLData);
2122   return rc;
2123 }
2124 
2125 
2126 static udm_rc_t
UdmStoreWords(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)2127 UdmStoreWords(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
2128 {
2129   UDM_ASSERT(UdmSQLDBModeHandler(db)->StoreWords != NULL);
2130   return UdmSQLDBModeHandler(db)->StoreWords(Indexer, db, Doc);
2131 }
2132 
2133 
2134 /********************* Inserting/Deleting URLs and Links *******************/
2135 
2136 static udm_rc_t
UdmDeleteRedirects(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)2137 UdmDeleteRedirects(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
2138 {
2139   char qbuf[128];
2140   urlid_t  id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2141   udm_snprintf(qbuf, sizeof(qbuf),
2142                "DELETE FROM redirect WHERE url_id=%d", id);
2143   return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
2144 }
2145 
2146 
2147 static udm_rc_t
UdmStoreRedirects(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc)2148 UdmStoreRedirects(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc)
2149 {
2150   char qbuf[128];
2151   urlid_t  id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2152   udm_rc_t rc;
2153   size_t i;
2154 
2155   if (!Doc->Spider.collect_links_destination)
2156     return UDM_OK;
2157   if (UDM_OK != (rc= UdmDeleteRedirects(A, db, Doc)))
2158     return rc;
2159 
2160   udm_snprintf(qbuf, sizeof(qbuf),
2161                "INSERT INTO redirect (url_id,seed,url) VALUES(%s,%s,%s)",
2162                 UdmDBSQLParamPlaceHolder(db, 1),
2163                 UdmDBSQLParamPlaceHolder(db, 2),
2164                 UdmDBSQLParamPlaceHolder(db, 3));
2165 
2166   if (UDM_OK != (rc= UdmDBSQLPrepare(A, db, qbuf)))
2167     return rc;
2168 
2169   /* UdmDocBaseHref() && UdmDocConvertHrefs() is already done here */
2170   for (i= 0; i < Doc->Hrefs.nhrefs; i++)
2171   {
2172     UDM_HREF *H= &Doc->Hrefs.Href[i];
2173     udmcrc32_t url_seed;
2174 
2175     if (H->Param.link_source != UDM_LINK_SOURCE_REDIRECT)
2176       continue;
2177     if (!(H->Param.method_reason & Doc->Spider.collect_links_destination))
2178       continue;
2179 
2180     url_seed= UdmStrHash32(H->url) & 0xFF;
2181 
2182     /*
2183       TODO34: Remove duplicate links.
2184       Some links can be duplicate here,
2185       because after UdmDocConvertHrefs() different links can become the same:
2186       http://site/  -> http://site/
2187       http://site/? -> http://site/
2188     */
2189     if (UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 1, &id, (int) sizeof(id),
2190                                              UDM_SQLTYPE_INT32)) ||
2191       UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 2, &url_seed, (int) sizeof(url_seed),
2192                                            UDM_SQLTYPE_INT32)) ||
2193       UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 3, H->url, (int) strlen(H->url),
2194                                            UDM_SQLTYPE_VARCHAR)) ||
2195       UDM_OK != (rc= UdmDBSQLExecute(A, db)))
2196     {
2197       break;
2198     }
2199   }
2200   UdmDBSQLStmtFree(A, db);
2201   return rc;
2202 }
2203 
2204 
2205 static udm_rc_t
UdmDeleteLinks(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)2206 UdmDeleteLinks(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
2207 {
2208   char  qbuf[128];
2209   urlid_t  url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2210   sprintf(qbuf,"DELETE FROM links WHERE url_id=%d", url_id);
2211   return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
2212 }
2213 
2214 
2215 static udm_rc_t
UdmStoreLinks(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc)2216 UdmStoreLinks(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc)
2217 {
2218   char qbuf[128];
2219   udm_rc_t rc;
2220   size_t i;
2221   urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2222   udmcrc32_t url_seed= 0;
2223   UDM_CHARSET *cs= Doc->lcs;
2224 
2225   if (!Doc->Spider.collect_links_destination)
2226     return UDM_OK;
2227 
2228   if (UdmSQLDBType(db) == UDM_DB_MYSQL &&
2229       UDM_OK != (rc= UdmDBSQLQuery(A, db, NULL, "LOCK TABLE links WRITE")))
2230     return rc;
2231 
2232   if (UDM_OK != (rc= UdmDeleteLinks(A, db, Doc)))
2233     return rc;
2234 
2235   udm_snprintf(qbuf, sizeof(qbuf),
2236                "INSERT INTO links (url_id,url,linktext,src,rel,seed) "
2237                "VALUES(%s,%s,%s,%s,%s,%s)",
2238                 UdmDBSQLParamPlaceHolder(db, 1),
2239                 UdmDBSQLParamPlaceHolder(db, 2),
2240                 UdmDBSQLParamPlaceHolder(db, 3),
2241                 UdmDBSQLParamPlaceHolder(db, 4),
2242                 UdmDBSQLParamPlaceHolder(db, 5),
2243                 UdmDBSQLParamPlaceHolder(db, 6));
2244 
2245   if (UDM_OK != (rc= UdmDBSQLPrepare(A, db, qbuf)))
2246     return rc;
2247 
2248   /* UdmDocBaseHref() && UdmDocConvertHrefs() is already done here */
2249   for (i= 0; i < Doc->Hrefs.nhrefs; i++)
2250   {
2251     UDM_HREF *H= &Doc->Hrefs.Href[i];
2252     UDM_CONST_STR def= {"", 0};
2253     UDM_CONST_STR txt, rel;
2254     const char *link_source= UdmLinkSourceStr(H->Param.link_source);
2255 
2256     if (H->Param.link_source == UDM_LINK_SOURCE_REDIRECT)
2257       continue;
2258     /*
2259       TODO34: Allow to specify link sources in CollectLinks,
2260       with options similar to FollowLinks.
2261     */
2262     if (H->Param.link_source == UDM_LINK_SOURCE_DIR)
2263       continue;
2264     if (!(H->Param.method_reason & Doc->Spider.collect_links_destination))
2265       continue;
2266 
2267     UdmVarListFindConstStr(&txt, &H->HrefVars, "LinkText", &def);
2268     UdmVarListFindConstStr(&rel, &H->HrefVars, "Rel", &def);
2269     txt.length= cs->cset->well_formed_length(cs, txt.str, UDM_MIN(1024, txt.length), UDM_RECODE_HTML);
2270     rel.length= cs->cset->well_formed_length(cs, rel.str, UDM_MIN(32, rel.length), UDM_RECODE_HTML);
2271     url_seed= UdmStrHash32(H->url) & 0xFF;
2272 
2273     /*
2274       TODO34: Remove duplicate links.
2275       Some links can be duplicate here,
2276       because after UdmDocConvertHrefs() different links can become the same:
2277       http://site/  -> http://site/
2278       http://site/? -> http://site/
2279     */
2280     if (UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 1, &url_id, (int) sizeof(url_id),
2281                                              UDM_SQLTYPE_INT32)) ||
2282       UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 2, H->url, (int) strlen(H->url),
2283                                            UDM_SQLTYPE_VARCHAR)) ||
2284       UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 3, txt.str, (int) txt.length,
2285                                            UDM_SQLTYPE_VARCHAR)) ||
2286       UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 4, link_source, (int) strlen(link_source),
2287                                            UDM_SQLTYPE_VARCHAR)) ||
2288       UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 5, rel.str, (int) rel.length,
2289                                            UDM_SQLTYPE_VARCHAR)) ||
2290       UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 6, &url_seed, (int) sizeof(url_seed),
2291                                            UDM_SQLTYPE_INT32)) ||
2292       UDM_OK != (rc= UdmDBSQLExecute(A, db)))
2293     {
2294       break;
2295     }
2296   }
2297   UdmDBSQLStmtFree(A, db);
2298   if (rc== UDM_OK && UdmSQLDBType(db) == UDM_DB_MYSQL)
2299     rc= UdmDBSQLQuery(A, db, NULL, "UNLOCK TABLES");
2300   return rc;
2301 }
2302 
2303 
2304 static udm_rc_t
UdmExportURL(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc,const char * sql_export)2305 UdmExportURL(UDM_AGENT *Indexer,
2306              UDM_DB *db,
2307              UDM_DOCUMENT *Doc,
2308              const char *sql_export)
2309 {
2310   udm_rc_t rc= UDM_OK;
2311   char *part, *lt, *sql_export_copy= UdmStrdup(sql_export);
2312   UDM_DSTR d;
2313   UDM_VARLIST Vars;
2314   UdmVarListInit(&Vars);
2315   UdmDSTRInit(&d,256);
2316 
2317   UdmVarListSQLEscape(Indexer, &Vars, &Doc->Sections, db);
2318   for (part= udm_strtok_r(sql_export_copy, ";", &lt) ;
2319        part ;
2320        part= udm_strtok_r(NULL, ";", &lt))
2321   {
2322     UdmDSTRParse(&d, part, &Vars);
2323     if (UDM_OK!= (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&d))))
2324       break;
2325     UdmDSTRReset(&d);
2326   }
2327   UdmVarListFree(&Vars);
2328   UdmDSTRFree(&d);
2329   UdmFree(sql_export_copy);
2330   return rc;
2331 }
2332 
2333 
2334 static udm_rc_t
UdmAddURL(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)2335 UdmAddURL(UDM_AGENT *Indexer,UDM_DOCUMENT * Doc,UDM_DB *db)
2336 {
2337   char    *e_url, *qbuf;
2338   const char  *url;
2339   int    url_seed;
2340   int    use_crc32_url_id;
2341   int    usehtdburlid;
2342   udm_rc_t rc= UDM_OK;
2343   size_t          len;
2344   const char      *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
2345   urlid_t rec_id = 0;
2346 
2347   url = UdmVarListFindStr(&Doc->Sections,"URL","");
2348   use_crc32_url_id = !strcasecmp(UdmVarListFindStr(&Indexer->Conf->Vars, "UseCRC32URLId", "no"), "yes");
2349   usehtdburlid = UdmVarListFindInt(&Indexer->Conf->Vars, "UseHTDBURLId", 0);
2350 
2351   len = strlen(url);
2352   e_url = (char*)UdmMalloc(4 * len + 1);
2353   if (e_url == NULL) return UDM_ERROR;
2354   qbuf = (char*)UdmMalloc(4 * len + 512);
2355   if (qbuf == NULL)
2356   {
2357     UDM_FREE(e_url);
2358     return UDM_ERROR;
2359   }
2360 
2361   url_seed = UdmStrHash32(url) & 0xFF;
2362 
2363   /* Escape URL string */
2364   UdmDBSQLEscStr(Indexer, db, e_url, url, len);
2365 
2366   if(use_crc32_url_id || usehtdburlid)
2367   {
2368     /* Auto generation of rec_id */
2369     /* using CRC32 algorythm     */
2370     if (use_crc32_url_id) rec_id = UdmStrHash32(url);
2371     else rec_id = UdmVarListFindInt(&Doc->Sections, "HTDB_URL_ID", 0);
2372 
2373     udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2374                  "(rec_id,url,referrer,hops,crc32,next_index_time,status,seed,bad_since_time,server_id,docsize,last_mod_time,shows) "
2375                  "VALUES (%s%i%s,'%s',%s%i%s,%d,0,%d,0,%d,%d,%s%i%s,%s%i%s,%li,0)",
2376            qu, rec_id, qu,
2377            e_url,
2378            qu, UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0), qu,
2379            UdmVarListFindInt(&Doc->Sections,"Hops",0),
2380            (int)time(NULL),
2381            url_seed, (int)time(NULL),
2382            qu, UdmVarListFindInt(&Doc->Sections, "Server_id", 0), qu,
2383            qu, UdmVarListFindInt(&Doc->Sections, "Content-Length", 0), qu,
2384            UdmHttpDate2Time_t(UdmVarListFindStrNonEmpty(&Doc->Sections, "Last-Modified",
2385                               UdmVarListFindStrNonEmpty(&Doc->Sections, "Date", "")))
2386        );
2387   }else{
2388     /* Use dabatase generated rec_id */
2389     /* It depends on used DBType     */
2390     switch(UdmSQLDBType(db))
2391     {
2392     case UDM_DB_SOLID:
2393     case UDM_DB_ORACLE8:
2394     case UDM_DB_SAPDB:
2395       /* FIXME: Dirty hack for stupid too smart databases
2396        Change this for config parameter checking */
2397 /*      if (strlen(e_url)>UDM_URLSIZE)e_url[UDM_URLSIZE]=0;*/
2398       /* Use sequence next_url_id.nextval */
2399       udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2400                    "(url,referrer,hops,rec_id,crc32,next_index_time,status,seed,bad_since_time,server_id)"
2401                    " VALUES "
2402                    "('%s',%i,%d,next_url_id.nextval,0,%d,0,%d,%d,%i)",
2403         e_url,
2404         UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0),
2405         UdmVarListFindInt(&Doc->Sections,"Hops",0),
2406         (int)time(NULL),
2407          url_seed, (int)time(NULL),
2408          UdmVarListFindInt(&Doc->Sections, "Server_id", 0));
2409       break;
2410     case UDM_DB_MIMER:
2411       udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2412                    "(url,referrer,hops,rec_id,crc32,next_index_time,status,seed,bad_since_time,server_id)"
2413                    " VALUES "
2414                    "('%s',%i,%d,NEXT_VALUE OF rec_id_GEN,0,%d,0,%d,%d,%i)",
2415         e_url,
2416         UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0),
2417         UdmVarListFindInt(&Doc->Sections,"Hops",0),
2418         (int)time(NULL),
2419          url_seed, (int)time(NULL),
2420          UdmVarListFindInt(&Doc->Sections, "Server_id", 0));
2421       break;
2422     case UDM_DB_IBASE:
2423       udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2424                    "(url,referrer,hops,rec_id,crc32,next_index_time,status,seed,bad_since_time,server_id)"
2425                    " VALUES "
2426                    "('%s',%i,%d,GEN_ID(rec_id_GEN,1),0,%d,0,%d,%d,%i)",
2427         e_url,
2428         UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0),
2429         UdmVarListFindInt(&Doc->Sections,"Hops",0),
2430         (int)time(NULL),
2431          url_seed, (int)time(NULL),
2432          UdmVarListFindInt(&Doc->Sections, "Server_id", 0));
2433       break;
2434     case UDM_DB_PGSQL:
2435       if (UdmSQLDBVersion(db) > 90100)
2436       {
2437         /* Use 9.1 syntax: INSERT INTO t1 SELECT ... WHERE EXISTS (SELECT) */
2438         udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2439                      "(url,referrer,hops,crc32,next_index_time,status,seed,bad_since_time,server_id,docsize,last_mod_time,shows)"
2440                      " SELECT "
2441                      "'%s',%s%i%s,%d,0,%d,0,%d,%d,%s%i%s,%s%i%s,%li,0"
2442                      " WHERE NOT EXISTS "
2443                      "(SELECT rec_id FROM url WHERE url='%s')",
2444                      e_url,
2445                      qu, UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0), qu,
2446                      UdmVarListFindInt(&Doc->Sections,"Hops",0),
2447                      (int)time(NULL),
2448                      url_seed, (int)time(NULL),
2449                      qu, UdmVarListFindInt(&Doc->Sections, "Server_id", 0), qu,
2450                      qu, UdmVarListFindInt(&Doc->Sections, "Content-Length", 0), qu,
2451                      UdmHttpDate2Time_t(UdmVarListFindStrNonEmpty(&Doc->Sections, "Last-Modified",
2452                      UdmVarListFindStr(&Doc->Sections, "Date", ""))),
2453                     e_url);
2454         break;
2455       }
2456       /* else fall through */
2457     case UDM_DB_MYSQL: /* MySQL generates itself */
2458     case UDM_DB_VIRT:
2459     case UDM_DB_MSSQL:
2460     case UDM_DB_DB2:
2461     case UDM_DB_SQLITE:
2462     case UDM_DB_ACCESS:
2463     case UDM_DB_CACHE:
2464     case UDM_DB_SYBASE:
2465     case UDM_DB_SQLITE3:
2466     case UDM_DB_MONETDB:
2467       udm_snprintf(qbuf, 4 * len + 512, "INSERT INTO url "
2468                    "(url,referrer,hops,crc32,next_index_time,status,seed,bad_since_time,server_id,docsize,last_mod_time,shows)"
2469                    " VALUES "
2470                    "('%s',%s%i%s,%d,0,%d,0,%d,%d,%s%i%s,%s%i%s,%li,0)",
2471              e_url,
2472              qu, UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0), qu,
2473              UdmVarListFindInt(&Doc->Sections,"Hops",0),
2474              (int)time(NULL),
2475              url_seed, (int)time(NULL),
2476              qu, UdmVarListFindInt(&Doc->Sections, "Server_id", 0), qu,
2477              qu, UdmVarListFindInt(&Doc->Sections, "Content-Length", 0), qu,
2478              UdmHttpDate2Time_t(UdmVarListFindStrNonEmpty(&Doc->Sections, "Last-Modified",
2479              UdmVarListFindStr(&Doc->Sections, "Date", "")))
2480          );
2481     }
2482   }
2483 
2484   /* Exec INSERT now */
2485   if(UDM_OK!=(rc=UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
2486     goto ex;
2487 
2488 ex:
2489 
2490   UDM_FREE(qbuf);
2491   UDM_FREE(e_url);
2492   return rc;
2493 }
2494 
2495 
2496 /******************* Cached Copy *********************/
2497 
2498 #define SQLRESTODOC_COLUMNS \
2499   "rec_id,url,last_mod_time,docsize," \
2500   "next_index_time,referrer,crc32,status"
2501 
2502 static void
SQLResToDoc(UDM_ENV * Conf,UDM_DOCUMENT * D,UDM_SQLRES * sqlres,size_t i)2503 SQLResToDoc(UDM_ENV *Conf, UDM_DOCUMENT *D, UDM_SQLRES *sqlres, size_t i)
2504 {
2505   time_t    last_mod_time;
2506   char    dbuf[UDM_MAXTIMESTRLEN];
2507   const char  *format = UdmVarListFindStr(&Conf->Vars, "DateFormat", "%a, %d %b %Y, %X %Z");
2508   double          pr;
2509 
2510   UdmVarListReplaceStr(&D->Sections,"URL",UdmSQLValue(sqlres,i,1));
2511   UdmVarListReplaceInt(&D->Sections, "URL_ID", UdmStrHash32(UdmSQLValue(sqlres,i,1)));
2512   last_mod_time=atol(UdmSQLValue(sqlres,i,2));
2513   UdmVarListReplaceInt(&D->Sections, "Last-Modified-Timestamp", (int) last_mod_time);
2514   if (strftime(dbuf, sizeof(dbuf), format, localtime(&last_mod_time)) == 0)
2515   {
2516     UdmTime_t2HttpStr(last_mod_time, dbuf, sizeof(dbuf));
2517   }
2518   UdmVarListReplaceStr(&D->Sections,"Last-Modified",dbuf);
2519   UdmVarListReplaceStr(&D->Sections,"Content-Length",UdmSQLValue(sqlres,i,3));
2520   pr= atof(UdmSQLValue(sqlres,i,3)) / 1024;
2521   sprintf(dbuf, "%.2f", pr);
2522   UdmVarListReplaceStr(&D->Sections,"Content-Length-K",dbuf);
2523   last_mod_time=atol(UdmSQLValue(sqlres,i,4));
2524   if (strftime(dbuf, sizeof(dbuf), format, localtime(&last_mod_time)) == 0)
2525   {
2526     UdmTime_t2HttpStr(last_mod_time, dbuf, sizeof(dbuf));
2527   }
2528   UdmVarListReplaceStr(&D->Sections,"Next-Index-Time",dbuf);
2529   UdmVarListReplaceInt(&D->Sections, "Referrer-ID", UDM_ATOI(UdmSQLValue(sqlres,i,5)));
2530   UdmVarListReplaceInt(&D->Sections,"crc32",atoi(UdmSQLValue(sqlres,i,6)));
2531 
2532 #if BAR_COMMA_PERIOD_ORACLE_PROBLEM
2533   {
2534 	char *num= UdmSQLValue(sqlres, i, 8);
2535 	char *comma= strchr(num, ',');
2536 	if (comma)
2537 	  *comma= '.';
2538   }
2539 #endif
2540 
2541   UdmVarListReplaceStr(&D->Sections, "Status", UdmSQLValue(sqlres, i, 7));
2542 }
2543 
2544 
2545 static udm_rc_t
UdmGetURLInfoOneDoc(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)2546 UdmGetURLInfoOneDoc(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
2547 {
2548   udm_rc_t rc;
2549   char buf[64];
2550   size_t i;
2551   UDM_SQLRES SQLRes;
2552 
2553   udm_snprintf(buf, sizeof(buf), "SELECT sname, sval FROM urlinfo WHERE url_id=%d", UDM_ATOI(UdmVarListFindStr(&Doc->Sections, "ID", "0")));
2554   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, buf)))
2555     return rc;
2556 
2557   for (i= 0; i < UdmSQLNumRows(&SQLRes); i++)
2558   {
2559     const char *sname= UdmSQLValue(&SQLRes, i, 0);
2560     const char *sval= UdmSQLValue(&SQLRes, i, 1);
2561     /*size_t l= UdmSQLLen(&SQLRes, i, 1);*/
2562 
2563     if (!sname)
2564       continue;
2565 
2566     UdmVarListReplaceStr(&Doc->Sections, sname, sval ?  sval : "");
2567   }
2568   UdmSQLFree(&SQLRes);
2569 
2570   return UDM_OK;
2571 }
2572 
2573 
2574 static udm_rc_t
UdmGetCachedCopyOneDoc(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc)2575 UdmGetCachedCopyOneDoc(UDM_AGENT *Indexer, UDM_DB *db, UDM_DOCUMENT *Doc)
2576 {
2577   urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2578   size_t max_doc_size= UdmVarListFindInt(&Indexer->Conf->Vars, "MaxDocSize", UDM_MAXDOCSIZE);
2579   udm_rc_t rc;
2580   char buf[128];
2581   UDM_SQLRES SQLRes;
2582 
2583   udm_snprintf(buf, sizeof(buf),
2584                "SELECT content FROM cachedcopy "
2585                "WHERE url_id=%d",
2586                (int) url_id);
2587 
2588   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, buf)))
2589     return rc;
2590 
2591   if (UdmSQLNumRows(&SQLRes) == 1)
2592   {
2593     const char *sval= UdmSQLValue(&SQLRes, 0, 0);
2594     size_t len= UdmSQLLen(&SQLRes, 0, 0);
2595     udm_timer_t timer= 0;
2596     UdmDocSetFromCachedHTTPResponse(Doc, sval, len, max_doc_size, &timer);
2597   }
2598 
2599   UdmSQLFree(&SQLRes);
2600 
2601   /* TODO34: Get cached copy from the original location
2602   if (unpack_cached_copy && !CachedCopy_found)
2603   {
2604     const char *url= UdmVarListFindStr(&Doc->Sections, "url", NULL);
2605     UdmGetURLSimple(Indexer, Doc, url);
2606   }
2607   */
2608 
2609   return UDM_OK;
2610 }
2611 
2612 
2613 static udm_rc_t
UdmGetCachedCopy(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)2614 UdmGetCachedCopy(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc, UDM_DB *db)
2615 {
2616   UDM_SQLRES SQLRes;
2617   char buf[1024];
2618   udm_rc_t rc;
2619   int url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2620 
2621   if (!url_id)
2622     UdmFindURL(Indexer, Doc, db);
2623   url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2624   udm_snprintf(buf, sizeof(buf),
2625                "SELECT " SQLRESTODOC_COLUMNS
2626                " FROM url WHERE rec_id=%d", url_id);
2627   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, buf)))
2628     return rc;
2629 
2630   if (!UdmSQLNumRows(&SQLRes))
2631   {
2632     UdmSQLFree(&SQLRes);
2633     return UDM_ERROR;
2634   }
2635 
2636   SQLResToDoc(Indexer->Conf, Doc, &SQLRes, 0);
2637   UdmSQLFree(&SQLRes);
2638 
2639   if (UDM_OK != (rc= UdmGetCachedCopyOneDoc(Indexer, db, Doc)))
2640     return rc;
2641 
2642   if (UDM_OK != (rc= UdmGetURLInfoOneDoc(Indexer, db, Doc)))
2643     return rc;
2644 
2645   return UDM_OK;
2646 }
2647 
2648 
2649 /********************** Reindexing "indexer -a" *************************/
2650 
2651 static udm_rc_t
UdmMarkForReindex(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)2652 UdmMarkForReindex(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
2653 {
2654   char    qbuf[1024];
2655   const char  *where;
2656   UDM_SQLRES   SQLRes;
2657   size_t          i, j;
2658   udm_rc_t rc;
2659   const char      *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
2660   UDM_DSTR buf;
2661 
2662   UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_DB);
2663   if (UDM_OK != (rc= UdmSQLBuildWhereCondition(Indexer, db, Query, &where)))
2664     return rc;
2665 
2666   if (UdmSQLDBFlags(db) & UDM_SQL_HAVE_SUBSELECT &&
2667       UdmSQLDBType(db) != UDM_DB_MYSQL)
2668   {
2669     udm_snprintf(qbuf,sizeof(qbuf),"UPDATE url SET next_index_time=%d WHERE rec_id IN (SELECT url.rec_id FROM url%s %s %s)",
2670        (int)time(NULL), Query->from, (where[0]) ? "WHERE" : "", where);
2671     return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
2672   }
2673 
2674   udm_snprintf(qbuf, sizeof(qbuf), "SELECT url.rec_id FROM url%s %s %s",
2675                Query->from, (where[0]) ? "WHERE" : "", where);
2676   if(UDM_OK != (rc = UdmDBSQLQuery(Indexer, db, &SQLRes, qbuf))) return rc;
2677 
2678   UdmDSTRInit(&buf, 4096);
2679   if (UdmSQLDBHaveIn(db))
2680   {
2681     for (i = 0; i < UdmSQLNumRows(&SQLRes); i += 512)
2682     {
2683       UdmDSTRReset(&buf);
2684       UdmDSTRAppendf(&buf, "UPDATE url SET next_index_time=%d WHERE rec_id IN (", (int)time(NULL));
2685       for (j = 0; (j < 512) && (i + j < UdmSQLNumRows(&SQLRes)); j++)
2686       {
2687         UdmDSTRAppendf(&buf, "%s%s%s%s", (j) ? "," : "", qu, UdmSQLValue(&SQLRes, i + j, 0), qu);
2688       }
2689       UdmDSTRAppendf(&buf, ")");
2690       if(UDM_OK != (rc = UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&buf))))
2691       {
2692         UdmSQLFree(&SQLRes);
2693 	UdmDSTRFree(&buf);
2694         return rc;
2695       }
2696     }
2697   } else {
2698     for (i = 0; i < UdmSQLNumRows(&SQLRes); i++)
2699     {
2700       UdmDSTRReset(&buf);
2701       UdmDSTRAppendf(&buf, "UPDATE url SET next_index_time=%d WHERE rec_id=%s", (int)time(NULL),  UdmSQLValue(&SQLRes, i, 0));
2702       if(UDM_OK != (rc = UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&buf))))
2703       {
2704         UdmSQLFree(&SQLRes);
2705 	UdmDSTRFree(&buf);
2706         return rc;
2707       }
2708     }
2709   }
2710   UdmDSTRFree(&buf);
2711   UdmSQLFree(&SQLRes);
2712   return UDM_OK;
2713 }
2714 
2715 
2716 /************** Child - for new extensions ****************/
2717 
2718 static udm_rc_t
UdmRegisterChild(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)2719 UdmRegisterChild(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,UDM_DB *db)
2720 {
2721   char  qbuf[1024];
2722   urlid_t  url_id = UdmVarListFindInt(&Doc->Sections,"ID",0);
2723   urlid_t  parent_id = UdmVarListFindInt(&Doc->Sections,"Parent-ID",0);
2724   const char      *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
2725 
2726   udm_snprintf(qbuf,sizeof(qbuf),"insert into links (ot,k,weight) values(%s%i%s,%s%i%s,0.0)", qu, parent_id, qu, qu, url_id, qu);
2727   return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
2728 }
2729 
2730 
2731 /*********************** Update URL ***********************/
2732 
2733 static udm_rc_t
UdmUpdateUrl(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)2734 UdmUpdateUrl(UDM_AGENT *Indexer,UDM_DOCUMENT *Doc,UDM_DB *db)
2735 {
2736   char qbuf[256];
2737   urlid_t  url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
2738   int  status=UdmVarListFindInt(&Doc->Sections,"Status",0);
2739   int  prevStatus = UdmVarListFindInt(&Doc->Sections, "PrevStatus", 0);
2740   int  next_index_time=UdmHttpDate2Time_t(UdmVarListFindStr(&Doc->Sections,"Next-Index-Time",""));
2741   udm_rc_t res;
2742   const char      *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
2743 
2744   if (prevStatus != status && status > 300 && status != 304)
2745     sprintf(qbuf, "UPDATE url SET "
2746             "status=%d,next_index_time=%d,bad_since_time=%d,server_id=%s%i%s"
2747             " WHERE rec_id=%s%i%s",
2748             status, next_index_time, (int)time(NULL),
2749             qu, UdmVarListFindInt(&Doc->Sections, "Server_id",0),
2750             qu, qu, url_id, qu);
2751   else
2752     sprintf(qbuf,"UPDATE url SET "
2753             "status=%d,next_index_time=%d,server_id=%s%i%s"
2754             " WHERE rec_id=%s%i%s",
2755             status, next_index_time,
2756             qu, UdmVarListFindInt(&Doc->Sections, "Server_id",0), qu,
2757             qu, url_id, qu);
2758 
2759   if(UDM_OK!=(res=UdmDBSQLQuery(Indexer, db, NULL, qbuf)))return res;
2760 
2761   /* remove all old broken hrefs from this document to avoid broken link collecting */
2762   return UdmDeleteBadHrefs(Indexer,Doc,db,url_id);
2763 }
2764 
2765 
2766 static udm_rc_t
UdmDocNormalizeContentLanguage(UDM_DOCUMENT * Doc)2767 UdmDocNormalizeContentLanguage(UDM_DOCUMENT *Doc)
2768 {
2769   UDM_VAR    *var;
2770   if ((var= UdmVarListFindVar(&Doc->Sections, "Content-Language")))
2771   {
2772     char language[128];
2773     const char *lang= UdmVarStr(var) ? UdmVarStr(var) :
2774                       UdmVarListFindStr(&Doc->Sections, "DefaultLang", "en");
2775     size_t i, len= udm_snprintf(language, sizeof(language), "%s", lang);
2776     for(i= 0; i < len; i++)
2777       language[i]= tolower(language[i]);
2778     return UdmVarListReplaceStr(&Doc->Sections, "Content-Language", language);
2779   }
2780   return UDM_OK;
2781 }
2782 
2783 
2784 static udm_rc_t
UdmUpdateUrlWithLangAndCharset(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)2785 UdmUpdateUrlWithLangAndCharset(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,UDM_DB *db)
2786 {
2787   char  *qbuf;
2788   udm_rc_t rc;
2789   const char  *charset;
2790   int    status, prevStatus;
2791   urlid_t         url_id;
2792   char    qsmall[64];
2793   const char      *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
2794   int IndexTime= UdmVarListFindInt(&Indexer->Conf->Vars, "IndexTime", 0);
2795 
2796   status = UdmVarListFindInt(&Doc->Sections, "Status", 0);
2797   prevStatus = UdmVarListFindInt(&Doc->Sections, "PrevStatus", 0);
2798   url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
2799 
2800   if (UDM_OK != (rc= UdmDocNormalizeContentLanguage(Doc)))
2801     return rc;
2802 
2803   charset = UdmVarListFindStr(&Doc->Sections, "Charset",
2804             UdmVarListFindStr(&Doc->Sections, "RemoteCharset", "iso-8859-1"));
2805   charset = UdmCharsetCanonicalName(charset);
2806   UdmVarListReplaceStr(&Doc->Sections, "Charset", charset);
2807 
2808   if (prevStatus != status && status > 300 && status != 304)
2809     udm_snprintf(qsmall, 64, ", bad_since_time=%d", (int)time(NULL));
2810   else qsmall[0] = '\0';
2811 
2812   if (IndexTime)
2813   {
2814     if (! prevStatus) udm_snprintf(UDM_STREND(qsmall), 64, ",last_mod_time=%li", time(NULL));
2815   }
2816   else
2817   {
2818     const char *lmsrc= UdmVarListFindStrNonEmpty(&Doc->Sections, "User.Date",
2819                        UdmVarListFindStrNonEmpty(&Doc->Sections, "Last-Modified",
2820                        UdmVarListFindStrNonEmpty(&Doc->Sections, "Date", "")));
2821     udm_snprintf(UDM_STREND(qsmall), 64, ",last_mod_time=%li", UdmHttpDate2Time_t(lmsrc));
2822   }
2823   qbuf=(char*)UdmMalloc(1024);
2824 
2825 
2826   udm_snprintf(qbuf, 1023, "\
2827 UPDATE url SET \
2828 status=%d,\
2829 next_index_time=%d,\
2830 docsize=%d,\
2831 crc32=%d%s, server_id=%s%i%s \
2832 WHERE rec_id=%s%i%s",
2833   status,
2834   (int) UdmHttpDate2Time_t(UdmVarListFindStr(&Doc->Sections,"Next-Index-Time","")),
2835   UdmVarListFindInt(&Doc->Sections,"Content-Length",0),
2836   UdmVarListFindInt(&Doc->Sections,"crc32",0),
2837   qsmall,
2838   qu, UdmVarListFindInt(&Doc->Sections, "Server_id",0), qu,
2839   qu, url_id, qu);
2840 
2841   rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf);
2842   UDM_FREE(qbuf);
2843   return rc;
2844 }
2845 
2846 
2847 static udm_bool_t
UdmCachedCopyPrintCharset(udm_content_type_t ct)2848 UdmCachedCopyPrintCharset(udm_content_type_t ct)
2849 {
2850   switch (ct)
2851   {
2852     case UDM_CONTENT_TYPE_TEXT_PLAIN:
2853     case UDM_CONTENT_TYPE_TEXT_HTML:
2854     case UDM_CONTENT_TYPE_TEXT_XML:
2855     case UDM_CONTENT_TYPE_HTDB:
2856       return UDM_TRUE;
2857     case UDM_CONTENT_TYPE_MESSAGE_RFC822:
2858     case UDM_CONTENT_TYPE_AUDIO_MPEG:
2859     case UDM_CONTENT_TYPE_DOCX:
2860     case UDM_CONTENT_TYPE_TEXT_RTF:
2861     case UDM_CONTENT_TYPE_UNKNOWN:
2862       return UDM_FALSE;
2863   }
2864   UDM_ASSERT(0);
2865   return UDM_FALSE;
2866 }
2867 
2868 
2869 static udm_rc_t
UdmDocInsertCachedCopy(UDM_AGENT * Agent,UDM_DB * db,UDM_DOCUMENT * Doc)2870 UdmDocInsertCachedCopy(UDM_AGENT *Agent, UDM_DB *db, UDM_DOCUMENT *Doc)
2871 {
2872   char qbuf[256];
2873   udm_rc_t rc;
2874   UDM_CONST_STR content;
2875   urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
2876   UDM_HTTPBUF tmp;
2877   const char *orig_content_type= UdmVarListFindStrNonEmpty(&Doc->Sections, "Content-Type", NULL);
2878   const char *parser_content_type= UdmVarListFindStrNonEmpty(&Doc->Sections, "Cached.Content-Type", orig_content_type);
2879   udm_content_type_t orig_ct= orig_content_type ? UdmContentTypeByName(orig_content_type) : UDM_CONTENT_TYPE_UNKNOWN;
2880   udm_content_type_t parser_ct= parser_content_type ? UdmContentTypeByName(parser_content_type) : UDM_CONTENT_TYPE_UNKNOWN;
2881   UDM_CHARSET *remote_charset= UdmVarListFindCharset(&Doc->Sections, "RemoteCharset", NULL);
2882   UDM_CHARSET *orig_charset= UdmVarListFindCharset(&Doc->Sections, "Charset", remote_charset);
2883   UDM_CHARSET *parser_charset= UdmVarListFindCharset(&Doc->Sections, "Cached.Charset", orig_charset);
2884   const char *encoding= UdmVarListFindStr(&Agent->Conf->Vars, "CachedCopyEncoding", "deflate");
2885   urlid_t ts= (urlid_t) time(0);
2886 
2887   if (!parser_content_type || !UdmSQLDBHandler(db)->Prepare)
2888     return UDM_ERROR;
2889 
2890   /*UdmVarListPrint(&Doc->Sections, stdout);*/
2891 
2892   /*
2893     Don't store cached copies if neither indexing nor archiving is allowed.
2894     TODO34: respect noarchive at search time.
2895   */
2896   if (UdmHTTPBufContentToConstStr(&Doc->Buf, &content) || !content.length ||
2897       (!Doc->Spider.robots.archive && !Doc->Spider.robots.index))
2898     return UDM_OK;
2899 
2900   UdmHTTPBufInit(&tmp);
2901   if (UDM_OK != (rc= UdmHTTPBufAlloc(&tmp, content.length + 256)))
2902     return rc;
2903 
2904   UdmHTTPBufPrintf(&tmp, "Content-Type: %s", parser_content_type);
2905   if (parser_charset && UdmCachedCopyPrintCharset(parser_ct))
2906     UdmHTTPBufAppendf(&tmp, "; charset=%s", parser_charset->name);
2907   UdmHTTPBufAppendf(&tmp, "\r\n");
2908 
2909   /* Add the original content type if differs from the parser content type */
2910   if ((orig_content_type && orig_ct != parser_ct) ||
2911       (orig_charset != NULL && UdmCachedCopyPrintCharset(orig_ct) &&
2912        orig_charset != parser_charset))
2913 
2914   {
2915     UdmHTTPBufAppendf(&tmp, "X-Orig.Content-Type: %s", orig_content_type);
2916     if (orig_charset && UdmCachedCopyPrintCharset(orig_ct))
2917       UdmHTTPBufAppendf(&tmp, "; charset=%s", orig_charset->name);
2918     UdmHTTPBufAppendf(&tmp, "\r\n");
2919   }
2920 
2921   if (encoding && content.length > 128)
2922   {
2923     udm_content_encoding_t ce= UdmContentEncodingID(encoding);
2924     if (ce == UDM_CONTENT_ENCODING_DEFLATE)
2925     {
2926       UDM_HTTPBUF tmp2= tmp; /* Backup the buffer state */
2927       UdmHTTPBufAppendf(&tmp, "Content-Encoding: deflate\r\n\r\n");
2928       if (UDM_OK == UdmHTTPBufDeflateAppend(&tmp, content.str, content.length))
2929         goto ins;
2930       tmp= tmp2; /* Deflate failed, restore the buffer state */
2931     }
2932   }
2933 
2934   UdmHTTPBufAppendf(&tmp, "\r\n");
2935   UdmHTTPBufAppend(&tmp, content.str, content.length);
2936 
2937 ins:
2938   udm_snprintf(qbuf, sizeof(qbuf),
2939                "INSERT INTO cachedcopy (url_id,ts,content) "
2940                "VALUES(%s,%s,%s)",
2941                 UdmDBSQLParamPlaceHolder(db, 1),
2942                 UdmDBSQLParamPlaceHolder(db, 2),
2943                 UdmDBSQLParamPlaceHolder(db, 3));
2944 
2945   if (UDM_OK != (rc= UdmDBSQLPrepare(Agent, db, qbuf)))
2946     goto ex;
2947 
2948   UDM_ASSERT(sizeof(url_id) == 4);
2949   if (UDM_OK != (rc= UdmDBSQLBindParameter(Agent, db, 1,
2950                                            &url_id, (int) sizeof(url_id),
2951                                            UDM_SQLTYPE_INT32)) ||
2952       UDM_OK != (rc= UdmDBSQLBindParameter(Agent, db, 2,
2953                                            &ts, (int) sizeof(ts),
2954                                            UDM_SQLTYPE_INT32)) ||
2955       UDM_OK != (rc= UdmDBSQLBindParameter(Agent, db, 3,
2956                                            UdmHTTPBufPtr(&tmp),
2957                                            (int) UdmHTTPBufSize(&tmp),
2958                                            UDM_SQLTYPE_LONGVARBINARY)) ||
2959       UDM_OK != (rc= UdmDBSQLExecute(Agent, db)))
2960   {
2961     UdmDBSQLStmtFree(Agent, db);
2962     goto ex;
2963   }
2964 
2965   rc= UdmDBSQLStmtFree(Agent, db);
2966 
2967 ex:
2968   UdmHTTPBufFree(&tmp);
2969   return rc;
2970 }
2971 
2972 
2973 static udm_bool_t
UdmDocSectionToBeStored(const UDM_ENV * Env,const UDM_VAR * Var)2974 UdmDocSectionToBeStored(const UDM_ENV *Env, const UDM_VAR *Var)
2975 {
2976   const UDM_SECTION *Sec;
2977   if (UdmVarValueHandlerType(Var) != UDM_VALUE_HANDLER_TYPE_STR)
2978     return UDM_FALSE;
2979   Sec= (const UDM_SECTION *) UdmVarConstDataPtr(Var);
2980   return
2981     UdmEnvSectionMaxLengthEx(Env, Sec->Param.secno, Var->header.name) &&
2982     UdmSectionPtr(Sec) && UdmSectionLength(Sec);
2983 }
2984 
2985 
2986 static size_t
UdmDocHaveSectionsToBeStored(const UDM_ENV * Env,const UDM_DOCUMENT * Doc)2987 UdmDocHaveSectionsToBeStored(const UDM_ENV *Env, const UDM_DOCUMENT *Doc)
2988 {
2989   size_t i;
2990   for (i= 0; i < Doc->Sections.nvars; i++)
2991   {
2992     if (UdmDocSectionToBeStored(Env, UdmVarListFindByIndex(&Doc->Sections, i)))
2993       return UDM_TRUE;
2994   }
2995   return UDM_FALSE;
2996 }
2997 
2998 
2999 static udm_rc_t
UdmDocInsertSectionsUsingBind(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc)3000 UdmDocInsertSectionsUsingBind(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc)
3001 {
3002   udm_rc_t rc= UDM_OK;
3003   size_t i;
3004   char qbuf[256];
3005   urlid_t    url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
3006 
3007   UDM_ASSERT(UdmSQLDBHandler(db)->Prepare);
3008 
3009   if (!UdmDocHaveSectionsToBeStored(A->Conf, Doc))
3010     return UDM_OK;
3011 
3012   udm_snprintf(qbuf, sizeof(qbuf),
3013                "INSERT INTO urlinfo (url_id,sname,sval) "
3014                "VALUES(%s, %s, %s)",
3015                 UdmDBSQLParamPlaceHolder(db, 1),
3016                 UdmDBSQLParamPlaceHolder(db, 2),
3017                 UdmDBSQLParamPlaceHolder(db, 3));
3018 
3019   if (UDM_OK != (rc= UdmDBSQLPrepare(A, db, qbuf)))
3020     return rc;
3021 
3022   for(i= 0; i< Doc->Sections.nvars; i++)
3023   {
3024     const UDM_VAR *Sec= UdmVarListFindConstByIndex(&Doc->Sections, i);
3025     UDM_CONST_STR valuebuf, *value= UdmVarGetConstStr(Sec, &valuebuf);
3026     if (UdmDocSectionToBeStored(A->Conf, Sec))
3027     {
3028       udm_sqltype_t bindtype= UdmSQLLongVarCharBindType(UdmSQL(db));
3029       UDM_ASSERT(sizeof(url_id) == 4);
3030       if (UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 1,
3031                                                &url_id, (int) sizeof(url_id),
3032                                                UDM_SQLTYPE_INT32)) ||
3033           UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 2,
3034                                                UdmVarName(Sec),
3035                                                (int) UdmVarNameLength(Sec),
3036                                                UDM_SQLTYPE_VARCHAR)) ||
3037           UDM_OK != (rc= UdmDBSQLBindParameter(A, db, 3,
3038                                                value->str, (int) value->length,
3039                                                bindtype)) ||
3040           UDM_OK != (rc= UdmDBSQLExecute(A, db)))
3041         return rc;
3042     }
3043   }
3044 
3045   return UdmDBSQLStmtFree(A, db);
3046 }
3047 
3048 
3049 static udm_rc_t
UdmDocInsertSectionsUsingEscapeBuildQuery(UDM_AGENT * A,UDM_DB * db,const char * table,urlid_t url_id,const char * extra_column_names,const UDM_CONST_STR * values,size_t nvalues,UDM_DSTR * qbuf)3050 UdmDocInsertSectionsUsingEscapeBuildQuery(UDM_AGENT *A, UDM_DB *db,
3051                                           const char *table,
3052                                           urlid_t url_id,
3053                                           const char *extra_column_names,
3054                                           const UDM_CONST_STR *values,
3055                                           size_t nvalues,
3056                                           UDM_DSTR *qbuf)
3057 {
3058   const char *E= (UdmSQLDBDriver(db) == UDM_DBAPI_PGSQL && UdmSQLDBVersion(db) >= 80101) ? "E" : "";
3059   size_t i;
3060   UdmDSTRReset(qbuf);
3061   UdmDSTRAppendf(qbuf, "INSERT INTO %s (url_id,%s) VALUES(",
3062                  table, extra_column_names);
3063   if (url_id)
3064     UdmDSTRAppendf(qbuf, "%d", url_id);
3065   else
3066     UdmDSTRAppendSTR(qbuf, "last_insert_id()");
3067 
3068   for (i= 0; i < nvalues; i++)
3069   {
3070     const UDM_CONST_STR *value= &values[i];
3071     size_t esclen;
3072     UdmDSTRReserve(qbuf, 2 + value->length * (UdmSQLDBType(db) == UDM_DB_PGSQL ? 4 : 2));
3073     UdmDSTRAppendf(qbuf, ",%s'", E);
3074     esclen= UdmDBSQLEscStr(A, db, qbuf->Val.str + qbuf->Val.length,
3075                            value->str, value->length);
3076     qbuf->Val.length+= esclen;
3077     UdmDSTRAppendf(qbuf, "'");
3078   }
3079   UdmDSTRAppend(qbuf, ")", 2);
3080   return UDM_OK;
3081 }
3082 
3083 
3084 static udm_rc_t
UdmDocInsertSectionsUsingEscape(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc,const char * table,urlid_t url_id)3085 UdmDocInsertSectionsUsingEscape(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc,
3086                                 const char *table,
3087                                 urlid_t url_id)
3088 {
3089   udm_rc_t rc= UDM_OK;
3090   size_t i, len, esc_multiply = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? 4 : 2;
3091   UDM_DSTR qbuf;
3092 
3093   /* Calculate maximum arg length */
3094   for(len= 0, i= 0; i < Doc->Sections.nvars; i++)
3095   {
3096     const UDM_VAR *Var= UdmVarListFindConstByIndex(&Doc->Sections, i);
3097     size_t l= UdmVarLength(Var) + UdmVarNameLength(Var);
3098     if (len < l)
3099       len= l;
3100   }
3101   if (!len)
3102     return UDM_OK;
3103 
3104   UdmDSTRInit(&qbuf, 256);
3105   UdmDSTRAlloc(&qbuf, esc_multiply * len + 128);
3106 
3107   for(i= 0; i< Doc->Sections.nvars; i++)
3108   {
3109     const UDM_VAR *Sec= UdmVarListFindConstByIndex(&Doc->Sections, i);
3110     if (UdmDocSectionToBeStored(A->Conf, Sec))
3111     {
3112       UDM_CONST_STR column[2];
3113       UDM_CONST_STR valuebuf, *value= UdmVarGetConstStr(Sec, &valuebuf);
3114       const UDM_CONST_STR *c= (const UDM_CONST_STR*) &column;
3115       UDM_ASSERT(value);
3116       UdmConstStrSetStr(&column[0], UdmVarName(Sec));
3117       column[1]= value[0];
3118       UdmDocInsertSectionsUsingEscapeBuildQuery(A, db, table,
3119                                                 url_id, "sname,sval",
3120                                                 c, 2, &qbuf);
3121       if(UDM_OK != (rc= UdmDBSQLQuery(A, db, NULL, UdmDSTRPtr(&qbuf))))
3122         break;
3123     }
3124   }
3125   UdmDSTRFree(&qbuf);
3126   return rc;
3127 }
3128 
3129 
3130 static udm_bool_t
UdmLongUpdateURLUseTnx(UDM_AGENT * A,UDM_DB * db)3131 UdmLongUpdateURLUseTnx(UDM_AGENT *A, UDM_DB *db)
3132 {
3133   switch (UdmSQLDBType(db))
3134   {
3135     case UDM_DB_VIRT:   /* TODO34: check */
3136     case UDM_DB_ACCESS: /* TODO34: check */
3137     case UDM_DB_DB2:    /* TODO34: check */
3138     case UDM_DB_CACHE:  /* TODO34: check */
3139       return UDM_FALSE;
3140     default:
3141     case UDM_DB_MYSQL:
3142       return UDM_TEST(UdmSQLDBFlags(db) & UDM_SQL_HAVE_TRANSACT);
3143   }
3144   return UDM_FALSE;
3145 }
3146 
3147 static udm_rc_t
UdmLongUpdateURL(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3148 UdmLongUpdateURL(UDM_AGENT *Indexer,UDM_DOCUMENT *Doc,UDM_DB *db)
3149 {
3150   udm_rc_t rc= UDM_OK;
3151   urlid_t url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
3152   const char *c;
3153   udm_bool_t use_tnx= UdmLongUpdateURLUseTnx(Indexer, db);
3154   /*
3155    TNX works fine: Sybase: ASE-15.0.2 Dev Edition + UnixODBC.
3156   */
3157 
3158   if (use_tnx && UDM_OK != (rc= UdmDBSQLBegin(Indexer, db)))
3159     return rc;
3160 
3161   /* Now store words */
3162   if(UDM_OK != (rc= UdmStoreWords(Indexer, db, Doc)))
3163     return rc;
3164 
3165   /* Store links */
3166   if (UDM_OK != (rc= UdmStoreLinks(Indexer, db, Doc)) ||
3167       UDM_OK != (rc= UdmStoreRedirects(Indexer, db, Doc)))
3168     return rc;
3169 
3170   /* Copy default languages, if not given by server and not guessed */
3171   if (!(c= UdmVarListFindStr(&Doc->Sections,"Content-Language",NULL)))
3172   {
3173     if ((c= UdmVarListFindStr(&Doc->Sections,"DefaultLang",NULL)))
3174       UdmVarListReplaceStr(&Doc->Sections,"Content-Language",c);
3175   }
3176 
3177 
3178   if(UDM_OK != (rc= UdmUpdateUrlWithLangAndCharset(Indexer, Doc, db)))
3179     return rc;
3180 
3181   /* remove all old broken hrefs from this document to avoid broken link collecting */
3182   if (UDM_OK != (rc= UdmDeleteBadHrefs(Indexer,Doc,db,url_id)))
3183     return rc;
3184 
3185   /* Remove old URLInfo only if PrevStatus != 0 */
3186   if (UdmVarListFindInt(&Doc->Sections, "PrevStatus", 1))
3187   {
3188     char    qsmall[128];
3189     sprintf(qsmall,"DELETE FROM urlinfo WHERE url_id=%i", url_id);
3190     if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qsmall)))
3191       return rc;
3192     sprintf(qsmall,"DELETE FROM cachedcopy WHERE url_id=%i", url_id);
3193     if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qsmall)))
3194       return rc;
3195   }
3196 
3197 /* No need delete from links here, it has been done before */
3198 
3199   if (UdmSQLDBFlags(db) & UDM_SQL_HAVE_BIND_TEXT)
3200   {
3201     rc= UdmDocInsertSectionsUsingBind(Indexer, db, Doc);
3202   }
3203   else
3204   {
3205     rc= UdmDocInsertSectionsUsingEscape(Indexer, db, Doc, "urlinfo", url_id);
3206   }
3207 
3208   if (rc == UDM_OK)
3209     rc= UdmDocInsertCachedCopy(Indexer, db, Doc);
3210 
3211   if(use_tnx && rc == UDM_OK)
3212     rc= UdmDBSQLCommit(Indexer, db);
3213 
3214   if (rc == UDM_OK && UdmSQLDBMode(db) == UDM_SQLDBMODE_MULTI)
3215   {
3216     int WordCacheSize= UdmVarListFindInt(&Indexer->Conf->Vars, "WordCacheSize", 0);
3217     if (WordCacheSize <= 0) WordCacheSize = 0x800000;
3218     /* UdmWordCacheWrite starts its own transaction */
3219     rc= UdmWordCacheWrite(Indexer, db, WordCacheSize);
3220   }
3221   return rc;
3222 }
3223 
3224 
3225 static udm_rc_t
UdmUpdateClone(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3226 UdmUpdateClone(UDM_AGENT *Indexer,UDM_DOCUMENT *Doc,UDM_DB *db)
3227 {
3228   udm_rc_t rc;
3229   if (UDM_OK != (rc= UdmDeleteWordFromURL(Indexer, Doc, db)))
3230     return rc;
3231   rc= UdmUpdateUrlWithLangAndCharset(Indexer, Doc, db);
3232   return rc;
3233 }
3234 
3235 
3236 
3237 /************************ Clones stuff ***************************/
3238 static udm_rc_t
UdmFindOrigin(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3239 UdmFindOrigin(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,UDM_DB *db)
3240 {
3241   size_t    i=0;
3242   char    qbuf[256]="";
3243   UDM_SQLRES  SQLRes;
3244   urlid_t    origin_id = 0;
3245   int    scrc32=UdmVarListFindInt(&Doc->Sections,"crc32",0);
3246   udm_rc_t rc;
3247 
3248   if (scrc32==0)return UDM_OK;
3249 
3250   if (UdmSQLDBHaveIn(db))
3251     sprintf(qbuf,"SELECT rec_id FROM url WHERE crc32=%d AND status IN (200,304,206)",scrc32);
3252   else
3253     sprintf(qbuf,"SELECT rec_id FROM url WHERE crc32=%d AND (status=200 OR status=304 OR status=206)",scrc32);
3254 
3255   if(UDM_OK!=(rc=UdmDBSQLQuery(Indexer,db,&SQLRes,qbuf)))
3256     return rc;
3257 
3258   for(i=0;i<UdmSQLNumRows(&SQLRes);i++)
3259   {
3260     const char *o;
3261     if((o=UdmSQLValue(&SQLRes,i,0)))
3262       if((!origin_id) || (origin_id > UDM_ATOI(o)))
3263         origin_id = UDM_ATOI(o);
3264   }
3265   UdmSQLFree(&SQLRes);
3266   UdmVarListReplaceInt(&Doc->Sections, "Origin-ID", origin_id);
3267   return(UDM_OK);
3268 }
3269 
3270 
3271 static udm_rc_t
UdmCloneListSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc,UDM_RESULT * Res)3272 UdmCloneListSQL(UDM_AGENT * Indexer, UDM_DB *db, UDM_DOCUMENT *Doc, UDM_RESULT *Res)
3273 {
3274   size_t    i, nr, nadd;
3275   char    qbuf[256];
3276   UDM_SQLRES  SQLres;
3277   int    scrc32=UdmVarListFindInt(&Doc->Sections,"crc32",0);
3278   urlid_t    origin_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
3279   udm_rc_t rc;
3280   const char      *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
3281   const char  *format = UdmVarListFindStr(&Indexer->Conf->Vars, "DateFormat", "%a, %d %b %Y, %X %Z");
3282 
3283   if (UdmResultNumRows(Res) > 4) return UDM_OK;
3284 
3285   if (!scrc32)
3286     return UDM_OK;
3287 
3288   sprintf(qbuf,"SELECT rec_id,url,last_mod_time,docsize FROM url WHERE crc32=%d AND (status=200 OR status=304 OR status=206) AND rec_id<>%s%i%s", scrc32, qu, origin_id, qu);
3289   if (UDM_OK!= (rc= UdmDBSQLQuery(Indexer, db, &SQLres, qbuf)))
3290     return UDM_OK;
3291 
3292   nr = UdmSQLNumRows(&SQLres);
3293   if( nr == 0)
3294   {
3295     UdmSQLFree(&SQLres);
3296     return UDM_OK;
3297   }
3298   nadd = 5 - UdmResultNumRows(Res);
3299   if(nr < nadd) nadd = nr;
3300 
3301   Res->Doc= (UDM_DOCUMENT*)UdmRealloc(Res->Doc,
3302                                       (UdmResultNumRows(Res) + nadd) *
3303                                       sizeof(UDM_DOCUMENT));
3304   for(i = 0; i < nadd; i++)
3305   {
3306     time_t    last_mod_time;
3307     char    buf[UDM_MAXTIMESTRLEN];
3308     UDM_DOCUMENT  *D = &Res->Doc[Res->num_rows + i];
3309 
3310     UdmDocInit(D);
3311     UdmVarListAddInt(&D->Sections, "ID", UDM_ATOI(UdmSQLValue(&SQLres,i,0)));
3312     UdmVarListAddStr(&D->Sections,"URL",UdmSQLValue(&SQLres,i,1));
3313     UdmVarListReplaceInt(&D->Sections, "URL_ID", UdmStrHash32(UdmSQLValue(&SQLres,i,1)));
3314     last_mod_time=atol(UdmSQLValue(&SQLres,i,2));
3315     if (strftime(buf, sizeof(buf), format, localtime(&last_mod_time)) == 0)
3316     {
3317       UdmTime_t2HttpStr(last_mod_time, buf, sizeof(buf));
3318     }
3319     UdmVarListAddStr(&D->Sections,"Last-Modified",buf);
3320     UdmVarListAddInt(&D->Sections,"Content-Length",atoi(UdmSQLValue(&SQLres,i,3)));
3321     UdmVarListAddInt(&D->Sections,"crc32",scrc32);
3322     UdmVarListAddInt(&D->Sections, "Origin-ID", origin_id);
3323   }
3324   Res->num_rows += nadd;
3325   UdmSQLFree(&SQLres);
3326   return UDM_OK;
3327 }
3328 
3329 
3330 static udm_rc_t
UdmQueryClones(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)3331 UdmQueryClones(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
3332 {
3333   size_t i;
3334   udm_rc_t rc= UDM_OK;
3335   for (i= 0; i < UdmResultNumRows(&Query->Res) && rc == UDM_OK; i++)
3336   {
3337     UDM_RESULT Cl;
3338     UdmResultInit(&Cl);
3339     if (UDM_OK == (rc= UdmCloneListSQL(A, db, &Query->Res.Doc[i], &Cl)))
3340     {
3341       size_t c;
3342       UdmVarListReplaceInt(&Query->Res.Doc[i].Sections, "nclones",
3343                            UdmResultNumRows(&Cl));
3344       for (c= 0; c < UdmResultNumRows(&Cl); c++)
3345       {
3346         char name[32];
3347         sprintf(name, "Clone%d", (int) c);
3348         UdmVarListReplaceLst(&Query->Res.Doc[i].Sections,
3349                              &Cl.Doc[c].Sections, name, "*");
3350       }
3351     }
3352     UdmResultFree(&Cl);
3353   }
3354   return rc;
3355 }
3356 
3357 
3358 /************** Get Target to be indexed ***********************/
3359 
3360 
3361 
3362 static void
UdmSQLTopInit(UDM_SQL_TOP_CLAUSE * Top)3363 UdmSQLTopInit(UDM_SQL_TOP_CLAUSE *Top)
3364 {
3365   Top->rownum[0]= 0;
3366   Top->limit[0]= 0;
3367   Top->top[0]= 0;
3368 }
3369 
3370 
3371 
3372 void
UdmSQLTopClause(UDM_SQL * db,size_t top_num,UDM_SQL_TOP_CLAUSE * Top)3373 UdmSQLTopClause(UDM_SQL *db, size_t top_num, UDM_SQL_TOP_CLAUSE *Top)
3374 {
3375   UdmSQLTopInit(Top);
3376   if (db->flags & UDM_SQL_HAVE_LIMIT)
3377   {
3378     udm_snprintf(Top->limit, UDM_SQL_TOP_BUF_SIZE, " LIMIT %d", (int) top_num);
3379   }
3380   else if (db->flags & UDM_SQL_HAVE_TOP)
3381   {
3382     udm_snprintf(Top->top, UDM_SQL_TOP_BUF_SIZE, " TOP %d ", (int) top_num);
3383   }
3384   else if (db->flags & UDM_SQL_HAVE_FIRST_SKIP)
3385   {
3386     udm_snprintf(Top->top, UDM_SQL_TOP_BUF_SIZE, " FIRST %d ", (int) top_num);
3387   }
3388   else if (db->DBType == UDM_DB_ORACLE8)
3389   {
3390 #if HAVE_ORACLE8
3391     if(db->DBDriver == UDM_DBAPI_ORACLE8)
3392     {
3393       udm_snprintf(Top->rownum, UDM_SQL_TOP_BUF_SIZE,
3394                    " AND ROWNUM<=%d", (int) top_num);
3395     }
3396 #endif
3397     if(!Top->rownum[0])
3398       udm_snprintf(Top->rownum, UDM_SQL_TOP_BUF_SIZE,
3399                    " AND ROWNUM<=%d", (int) top_num);
3400   }
3401 }
3402 
3403 
3404 static const  char select_url_str[]=
3405 "url.url,url.rec_id,docsize,status,hops,crc32,last_mod_time,seed";
3406 
3407 static const char select_url_str_for_dump[]=
3408 "url.url,url.rec_id,docsize,status,hops,crc32,last_mod_time,seed,"
3409 "next_index_time,bad_since_time,server_id"
3410 ;
3411 
3412 /*
3413   The columns that are dumped:
3414   - status
3415   - docsize
3416   - last_mod_time
3417   - hops
3418   - crc32
3419   - seed
3420   - url
3421   - next_index_time
3422   - bad_since_time
3423   - site_id
3424   - server_id
3425 
3426   The columns that don't need to be dumped for restore purposes:
3427   - rec_id
3428   - shows
3429   - sop_rank
3430   - referrer
3431 */
3432 static udm_rc_t
UdmTargetSQLResDump(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc,UDM_SQLRES * SQLRes,size_t rownum,UDM_DSTR * eurl)3433 UdmTargetSQLResDump(UDM_AGENT *Indexer, UDM_DB *db,
3434                     UDM_DOCUMENT *Doc,
3435                     UDM_SQLRES *SQLRes, size_t rownum,
3436                     UDM_DSTR *eurl)
3437 {
3438   int seed= UDM_ATOI(UdmSQLValue(SQLRes, rownum, 7));
3439   UdmVarListAddInt(&Doc->Sections, "ID", UDM_ATOI(UdmSQLValue(SQLRes,rownum,1)));
3440   printf("--seed=%d\n", seed);
3441   printf("INSERT INTO url ");
3442   printf("(url,docsize,status,hops,crc32,last_mod_time,seed,next_index_time,bad_since_time,server_id) VALUES (");
3443   if (UDM_OK != UdmSQLEscDSTR(UdmSQL(db), eurl,
3444                               UdmSQLValue(SQLRes, rownum, 0),
3445                               UdmSQLLen(SQLRes, rownum, 0)))
3446     return UDM_ERROR;
3447   printf("'%s',", UdmDSTRPtr(eurl));
3448   printf("%s,", UdmSQLValue(SQLRes, rownum, 2));
3449   printf("%s,", UdmSQLValue(SQLRes, rownum, 3));
3450   printf("%s,", UdmSQLValue(SQLRes, rownum, 4));
3451   printf("%s,", UdmSQLValue(SQLRes, rownum, 5));
3452   printf("%s,", UdmSQLValue(SQLRes, rownum, 6));
3453   printf("%s,", UdmSQLValue(SQLRes, rownum, 7));
3454   printf("%s,", UdmSQLValue(SQLRes, rownum, 8));
3455   printf("%s,", UdmSQLValue(SQLRes, rownum, 9));
3456   printf("%s", UdmSQLValue(SQLRes, rownum, 10));
3457   printf(");\n");
3458   return UDM_OK;
3459 }
3460 
3461 
3462 static void
UdmTargetSQLResToDoc(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_SQLRES * SQLRes,size_t i)3463 UdmTargetSQLResToDoc(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,
3464                      UDM_SQLRES *SQLRes, size_t i)
3465 {
3466   char buf[UDM_MAXTIMESTRLEN]= "";
3467   time_t last_mod_time;
3468   UdmVarListAddStr(&Doc->Sections,"URL",UdmSQLValue(SQLRes,i,0));
3469   UdmVarListAddInt(&Doc->Sections, "ID", UDM_ATOI(UdmSQLValue(SQLRes,i,1)));
3470   UdmVarListAddInt(&Doc->Sections,"Content-Length",atoi(UdmSQLValue(SQLRes,i,2)));
3471   UdmVarListAddInt(&Doc->Sections,"Status",atoi(UdmSQLValue(SQLRes,i,3)));
3472   UdmVarListAddInt(&Doc->Sections,"Hops",atoi(UdmSQLValue(SQLRes,i,4)));
3473   UdmVarListAddInt(&Doc->Sections,"crc32",atoi(UdmSQLValue(SQLRes,i,5)));
3474   last_mod_time= (time_t) atol(UdmSQLValue(SQLRes,i,6));
3475   UdmTime_t2HttpStr(last_mod_time, buf, sizeof(buf));
3476   if (last_mod_time != 0 && strlen(buf) > 0)
3477   {
3478     UdmVarListReplaceStr(&Doc->Sections, "Last-Modified", buf);
3479   }
3480 }
3481 
3482 /*
3483   Setting extending sections - only needed for targets
3484 */
3485 static void
UdmTargetSQLResToDoc_Extra(UDM_AGENT * A,UDM_DOCUMENT * Doc,UDM_SQLRES * SQLRes,size_t i)3486 UdmTargetSQLResToDoc_Extra(UDM_AGENT *A, UDM_DOCUMENT *Doc,
3487                            UDM_SQLRES *SQLRes, size_t i)
3488 {
3489   UdmVarListReplaceInt(&Doc->Sections, "URL_ID", UdmStrHash32(UdmSQLValue(SQLRes,i,0)));
3490   UdmVarListAddInt(&Doc->Sections,"PrevStatus",atoi(UdmSQLValue(SQLRes,i,3)));
3491 }
3492 
3493 
3494 static udm_rc_t
UdmTargetsSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)3495 UdmTargetsSQL(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
3496 {
3497   char    sortstr[128]= "";
3498   char    updstr[64]="";
3499   char    tblhint[64]="";
3500   UDM_SQL_TOP_CLAUSE Top;
3501   size_t    i = 0, j, start, nrows, qbuflen;
3502   UDM_SQLRES   SQLRes;
3503   char    smallbuf[128];
3504   udm_rc_t rc= UDM_OK;
3505   const char  *where;
3506   char    *qbuf=NULL;
3507   const char      *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
3508   udm_bool_t skip_lock= UdmVarListFindBool(&Indexer->Conf->Vars, "URLSelectSkipLock", UDM_FALSE);
3509   size_t url_num= UdmVarListFindInt(&Indexer->Conf->Vars, "URLSelectCacheSize", URL_SELECT_CACHE);
3510 
3511   UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_TARGETS);
3512   UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_DB);
3513 
3514   if (UdmSQLDB(db)->last_notargets_time == time(0))
3515   {
3516     /*
3517       Do not query the database again if it was already queried
3518       in the same second and returned 0 targets.
3519     */
3520     return UDM_OK;
3521   }
3522 
3523   if (Indexer->Conf->url_number < (int) url_num)
3524     url_num= Indexer->Conf->url_number;
3525   if (UDM_OK != (rc= UdmSQLBuildWhereCondition(Indexer, db, Query, &where)))
3526     return rc;
3527   qbuflen= 1024 + 4 * strlen(where);
3528 
3529   if ((qbuf = (char*)UdmMalloc(qbuflen + 2)) == NULL)
3530   {
3531       UdmLog(Indexer, UDM_LOG_ERROR, "Out of memory");
3532       return UDM_ERROR;
3533   }
3534 
3535   if ((Indexer->flags & (UDM_FLAG_SORT_HOPS | UDM_FLAG_SORT_EXPIRED)) ||
3536       !(Indexer->flags & UDM_FLAG_DONTSORT_SEED))
3537   {
3538     sprintf(sortstr, " ORDER BY %s%s%s",
3539       (Indexer->flags & UDM_FLAG_SORT_HOPS) ? "hops" : "",
3540       (Indexer->flags & UDM_FLAG_DONTSORT_SEED) ? "" : ((Indexer->flags & UDM_FLAG_SORT_HOPS) ? ",seed" : "seed"),
3541       (Indexer->flags & UDM_FLAG_SORT_EXPIRED) ?
3542       ( ((Indexer->flags & UDM_FLAG_SORT_HOPS) || !(Indexer->flags & UDM_FLAG_DONTSORT_SEED)  ) ?
3543         ",next_index_time" : "next_index_time") : "");
3544   }
3545 
3546   UdmDBSQLTopClause(Indexer, db, url_num, &Top);
3547 
3548   if(1)
3549   {
3550     switch(UdmSQLDBType(db))
3551     {
3552       case UDM_DB_MYSQL:
3553         udm_snprintf(qbuf, qbuflen,
3554                      "INSERT INTO udm_url_tmp "
3555                      "SELECT url.rec_id FROM url%s "
3556                      "WHERE next_index_time<=%d %s%s%s%s",
3557                       Query->from,
3558                       (int)time(NULL), where[0] ? "AND " : "",  where,
3559                       sortstr, Top.limit);
3560         if (UDM_OK != (rc= UdmDBSQLDropTableIfExists(Indexer, db, "udm_url_tmp")) ||
3561             UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, "CREATE TEMPORARY TABLE udm_url_tmp (rec_id int not null) ENGINE=MyISAM")) ||
3562             (!skip_lock &&
3563              UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, "LOCK TABLES udm_url_tmp WRITE, url WRITE, urlinfo AS it WRITE, urlinfo AS il WRITE, server AS s WRITE"))) ||
3564             UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3565           return rc;
3566         break;
3567       case UDM_DB_PGSQL:
3568         rc= UdmDBSQLQuery(Indexer, db, NULL,"BEGIN WORK");
3569         sprintf(updstr, " FOR UPDATE ");
3570 /*        rc=UdmDBSQLQuery(Indexer, db,NULL,"LOCK url");*/
3571         break;
3572       case UDM_DB_ORACLE8:
3573         sprintf(updstr, " FOR UPDATE ");
3574         break;
3575       case UDM_DB_MSSQL:
3576         strcpy(tblhint, " (TABLOCKX)");
3577         rc= UdmDBSQLBegin(Indexer, db);
3578         break;
3579       case UDM_DB_SAPDB:
3580         sprintf(updstr, " WITH LOCK ");
3581         break;
3582       default:
3583         break;
3584     }
3585     if (rc != UDM_OK)
3586       goto ex;
3587   }
3588 
3589   UdmSQL(db)->res_limit= url_num;
3590   if (UdmSQLDBType(db) == UDM_DB_MYSQL)
3591     udm_snprintf(qbuf, qbuflen, "SELECT %s FROM url, udm_url_tmp "
3592                                 "WHERE url.rec_id=udm_url_tmp.rec_id",
3593                                 select_url_str);
3594   else
3595     udm_snprintf(qbuf, qbuflen, "SELECT %s%s "
3596                                 "FROM url%s%s "
3597                                 "WHERE next_index_time<=%d %s%s%s"
3598                                 "%s%s%s",
3599                  Top.top, select_url_str, tblhint, Query->from,
3600                  (int)time(NULL), where[0] ? "AND " : "",  where, Top.rownum,
3601                  sortstr, updstr, Top.limit);
3602 
3603   if(UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, qbuf)))
3604     goto commit;
3605 
3606   if(!(nrows = UdmSQLNumRows(&SQLRes)))
3607   {
3608     UdmSQLFree(&SQLRes);
3609     UdmSQLDB(db)->last_notargets_time= time(0);
3610     goto commit;
3611   }
3612 
3613   start = Indexer->Conf->Targets.num_rows;
3614   Indexer->Conf->Targets.num_rows += nrows;
3615 
3616   Indexer->Conf->Targets.Doc =
3617     (UDM_DOCUMENT*)UdmRealloc(Indexer->Conf->Targets.Doc, sizeof(UDM_DOCUMENT)*(Indexer->Conf->Targets.num_rows + 1));
3618   if (Indexer->Conf->Targets.Doc == NULL)
3619   {
3620     UdmLog(Indexer, UDM_LOG_ERROR, "Out of memory at realloc %s[%d]", __FILE__, __LINE__);
3621     rc= UDM_ERROR;
3622     goto commit;
3623   }
3624 
3625   for(i = 0; i < nrows; i++)
3626   {
3627     UDM_DOCUMENT  *Doc = &Indexer->Conf->Targets.Doc[start + i];
3628     UdmDocInit(Doc);
3629     UdmTargetSQLResToDoc(Indexer, Doc, &SQLRes, i);
3630     UdmTargetSQLResToDoc_Extra(Indexer, Doc, &SQLRes, i);
3631   }
3632   UdmSQLFree(&SQLRes);
3633 
3634 
3635   if (UdmSQLDBHaveIn(db))
3636   {
3637     char  *urlin=NULL;
3638 
3639     if ( (qbuf = (char*)UdmRealloc(qbuf, qbuflen = qbuflen + 35 * URL_SELECT_CACHE)) == NULL)
3640     {
3641       UDM_FREE(qbuf);
3642       UdmLog(Indexer, UDM_LOG_ERROR, "Out of memory");
3643       rc= UDM_ERROR;
3644       goto commit;
3645     }
3646 
3647     if ( (urlin = (char*)UdmMalloc(35 * URL_SELECT_CACHE)) == NULL)
3648     {
3649       UDM_FREE(qbuf);
3650       UdmLog(Indexer, UDM_LOG_ERROR, "Out of memory");
3651       rc = UDM_ERROR;
3652       goto commit;
3653     }
3654     urlin[0]=0;
3655 
3656     for(i = 0; i < nrows; i+= URL_SELECT_CACHE)
3657     {
3658 
3659       urlin[0] = 0;
3660 
3661       for (j = 0; (j < URL_SELECT_CACHE) && (i + j < nrows) ; j++)
3662       {
3663 
3664       UDM_DOCUMENT  *Doc = &Indexer->Conf->Targets.Doc[start + i + j];
3665       urlid_t    url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
3666 
3667       if(urlin[0])strcat(urlin,",");
3668       sprintf(urlin+strlen(urlin), "%s%i%s", qu, url_id, qu);
3669       }
3670       udm_snprintf(qbuf, qbuflen, "UPDATE url SET next_index_time=%d WHERE rec_id in (%s)",
3671              (int)(time(NULL) + URL_LOCK_TIME), urlin);
3672       if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3673         goto commit;
3674     }
3675     UDM_FREE(urlin);
3676   }
3677   else
3678   {
3679     for(i = 0; i < nrows; i++)
3680     {
3681       UDM_DOCUMENT  *Doc = &Indexer->Conf->Targets.Doc[start + i];
3682       urlid_t    url_id = UdmVarListFindInt(&Doc->Sections, "ID", 0);
3683 
3684       udm_snprintf(smallbuf, 128, "UPDATE url SET next_index_time=%d WHERE rec_id=%i",
3685              (int)(time(NULL) + URL_LOCK_TIME), url_id);
3686       if(UDM_OK!=(rc=UdmDBSQLQuery(Indexer, db, NULL, smallbuf)))
3687         goto commit;
3688     }
3689   }
3690 
3691 
3692 commit:
3693 
3694   if (rc != UDM_OK)
3695   {
3696     UdmLog(Indexer, UDM_LOG_ERROR, "UdmTargetsSQL: DB error: %s", UdmDBSQLError(db));
3697   }
3698   if(1)
3699   {
3700     switch(UdmSQLDBType(db))
3701     {
3702       case UDM_DB_MYSQL:
3703         if (!skip_lock)
3704           rc= UdmDBSQLQuery(Indexer, db, NULL, "UNLOCK TABLES");
3705         break;
3706       case UDM_DB_PGSQL:
3707         rc=UdmDBSQLQuery(Indexer, db, NULL, "END WORK");
3708         break;
3709       case UDM_DB_MSSQL:
3710         rc= UdmDBSQLCommit(Indexer, db);
3711       default:
3712         break;
3713     }
3714   }
3715 ex:
3716   UDM_FREE(qbuf);
3717   return rc;
3718 }
3719 
3720 
3721 
3722 /******************* Truncate database ********************/
3723 
3724 static udm_rc_t
UdmTruncateURL(UDM_AGENT * Indexer,UDM_DB * db)3725 UdmTruncateURL(UDM_AGENT *Indexer,UDM_DB *db)
3726 {
3727   udm_rc_t rc;
3728 
3729   rc= UdmDBSQLTableTruncateOrDelete(Indexer, db, "url");
3730   if(rc!=UDM_OK)return rc;
3731 
3732   rc= UdmDBSQLTableTruncateOrDelete(Indexer, db, "redirect");
3733   if(rc != UDM_OK) return rc;
3734 
3735   rc= UdmDBSQLTableTruncateOrDelete(Indexer, db, "links");
3736   if(rc != UDM_OK) return rc;
3737 
3738   rc= UdmDBSQLTableTruncateOrDelete(Indexer, db, "cachedcopy");
3739   if(rc != UDM_OK) return rc;
3740 
3741   rc= UdmDBSQLTableTruncateOrDelete(Indexer, db, "urlinfo");
3742   return rc;
3743 }
3744 
3745 
3746 static udm_rc_t
UdmTruncateDict(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)3747 UdmTruncateDict(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
3748 {
3749   UDM_ASSERT(UdmSQLDBModeHandler(db)->QueryAction != NULL);
3750   return UdmSQLDBModeHandler(db)->QueryAction(Indexer, db, Query, UDM_QUERYCMD_CLEAR);
3751 }
3752 
3753 
3754 static udm_rc_t
UdmTruncateDB(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)3755 UdmTruncateDB(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
3756 {
3757   udm_rc_t rc;
3758   if((UDM_OK != (rc= UdmTruncateDict(Indexer, db, Query))) ||
3759      (UDM_OK != (rc= UdmTruncateURL(Indexer, db))))
3760     return rc;
3761   return UDM_OK;
3762 }
3763 
3764 
3765 /******************* Clear database with condition ********/
3766 
3767 static udm_rc_t
UdmDeleteWordsAndLinks(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3768 UdmDeleteWordsAndLinks(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc, UDM_DB *db)
3769 {
3770   udm_rc_t rc;
3771   if (UDM_OK != (rc= UdmDeleteWordFromURL(Indexer,Doc,db)))
3772     return rc;
3773 
3774   if (Doc->Spider.collect_links_destination)
3775   {
3776     int status= UdmVarListFindInt(&Doc->Sections, "Status", 0);
3777     /* In case of redirect, we store links from the "Location" header */
3778     if (UDM_OK != (rc= UdmDeleteLinks(Indexer, db, Doc)))
3779       return rc;
3780 
3781     if (UDM_OK != (rc= (status >= 300 && status <= 303) ?
3782                    UdmStoreRedirects(Indexer, db, Doc) :
3783                    UdmDeleteRedirects(Indexer, db, Doc)))
3784       return rc;
3785   }
3786 
3787   /* Set status, bad_since_time, etc */
3788   if (UDM_OK != (rc= UdmUpdateUrl(Indexer, Doc, db)))
3789     return rc;
3790 
3791   return rc;
3792 }
3793 
3794 
3795 static udm_rc_t
UdmDeleteWordFromURL(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3796 UdmDeleteWordFromURL(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc, UDM_DB *db)
3797 {
3798   urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
3799 
3800   if (!UdmVarListFindInt(&Doc->Sections, "PrevStatus", 0))
3801     return UDM_OK;
3802 
3803   UDM_ASSERT(UdmSQLDBModeHandler(db)->DeleteWordsFromURL != NULL);
3804   return UdmSQLDBModeHandler(db)->DeleteWordsFromURL(Indexer, db, url_id);
3805 }
3806 
3807 
3808 static udm_rc_t
UdmDeleteBadHrefs(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db,urlid_t url_id)3809 UdmDeleteBadHrefs(UDM_AGENT *Indexer,
3810                   UDM_DOCUMENT *Doc,
3811                   UDM_DB *db,
3812                   urlid_t url_id)
3813 {
3814   UDM_DOCUMENT  rDoc;
3815   UDM_SQLRES  SQLRes;
3816   char    q[256];
3817   size_t    i;
3818   size_t    nrows;
3819   udm_rc_t rc= UDM_OK;
3820   int    hold_period= UdmVarListFindInt(&Doc->Sections,"HoldBadHrefs",0);
3821   const char      *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
3822 
3823   if (hold_period <= 0)
3824     return UDM_OK;
3825 
3826   udm_snprintf(q, sizeof(q), "SELECT rec_id FROM url WHERE status > 300 AND status<>304 AND referrer=%s%i%s AND bad_since_time<%d",
3827     qu, url_id, qu, (int)time(NULL) - hold_period);
3828   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, q)))
3829     return rc;
3830 
3831   nrows = UdmSQLNumRows(&SQLRes);
3832 
3833   UdmDocInit(&rDoc);
3834   for(i = 0; i < nrows ; i++)
3835   {
3836     UdmVarListReplaceStr(&rDoc.Sections,"ID", UdmSQLValue(&SQLRes,i,0));
3837     if(UDM_OK!=(rc=UdmDeleteURL(Indexer, &rDoc, db)))
3838       break;
3839   }
3840   UdmDocFree(&rDoc);
3841   UdmSQLFree(&SQLRes);
3842   return rc;
3843 }
3844 
3845 
3846 static udm_rc_t
UdmDeleteURL(UDM_AGENT * Indexer,UDM_DOCUMENT * Doc,UDM_DB * db)3847 UdmDeleteURL(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc,UDM_DB *db)
3848 {
3849   char  qbuf[128];
3850   udm_rc_t rc;
3851   urlid_t  url_id  =UdmVarListFindInt(&Doc->Sections, "ID", 0);
3852   const char *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
3853 
3854   if(UDM_OK!=(rc=UdmDeleteWordFromURL(Indexer,Doc,db)))return(rc);
3855 
3856   sprintf(qbuf,"DELETE FROM url WHERE rec_id=%s%i%s", qu, url_id, qu);
3857   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3858     return rc;
3859 
3860   sprintf(qbuf,"DELETE FROM urlinfo WHERE url_id=%s%i%s", qu, url_id, qu);
3861   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3862     return rc;
3863 
3864   sprintf(qbuf,"DELETE FROM cachedcopy WHERE url_id=%s%i%s", qu, url_id, qu);
3865   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3866     return rc;
3867 
3868   sprintf(qbuf,"DELETE FROM redirect WHERE url_id=%s%i%s", qu, url_id, qu);
3869   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3870     return rc;
3871 
3872   sprintf(qbuf,"DELETE FROM links WHERE url_id=%s%i%s", qu, url_id, qu);
3873   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, qbuf)))
3874     return rc;
3875 
3876   /* remove all old broken hrefs from this document to avoid broken link collecting */
3877   if (UDM_OK != (rc= UdmDeleteBadHrefs(Indexer, Doc, db, url_id)))
3878     return rc;
3879 
3880   sprintf(qbuf,"UPDATE url SET referrer=%s0%s WHERE referrer=%s%i%s", qu, qu, qu, url_id, qu);
3881   return UdmDBSQLQuery(Indexer, db, NULL, qbuf);
3882 }
3883 
3884 
3885 static udm_rc_t
UdmClearDBUsingIN(UDM_AGENT * Indexer,UDM_DB * db,UDM_URLID_LIST * list)3886 UdmClearDBUsingIN(UDM_AGENT *Indexer, UDM_DB *db, UDM_URLID_LIST *list)
3887 {
3888   UDM_DSTR qbuf, urlin;
3889   udm_rc_t rc= UDM_OK; /* if list if empty */
3890   size_t part;
3891   size_t url_num = UdmVarListFindInt(&Indexer->Conf->Vars, "URLSelectCacheSize", URL_DELETE_CACHE);
3892 
3893   UdmDSTRInit(&qbuf, 4096);
3894   UdmDSTRInit(&urlin, 4096);
3895 
3896   for (part= 0; part < list->nurls; part+= url_num)
3897   {
3898     size_t offs;
3899     urlid_t *item= &list->urls[part];
3900     UdmDSTRReset(&urlin);
3901     for(offs= 0; (offs < url_num) && ((part + offs) < list->nurls); offs++)
3902     {
3903       if(offs) UdmDSTRAppend(&urlin,",", 1);
3904       UdmDSTRAppendf(&urlin, "%d", item[offs]);
3905     }
3906 
3907     if (UDM_OK != (rc= UdmDBSQLBegin(Indexer, db)))
3908       goto ret;
3909 
3910     switch (UdmSQLDBMode(db))
3911     {
3912       case UDM_SQLDBMODE_BLOB:
3913         break;
3914 
3915       case UDM_SQLDBMODE_MULTI:
3916         {
3917           int dictnum;
3918           for (dictnum= 0; dictnum <= MULTI_DICTS; dictnum++)
3919           {
3920             UdmDSTRReset(&qbuf);
3921             UdmDSTRAppendf(&qbuf,"DELETE FROM dict%02X WHERE url_id in (%s)",
3922                            dictnum, UdmDSTRPtr(&urlin));
3923             if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3924               goto ret;
3925           }
3926         }
3927         break;
3928 
3929       case UDM_SQLDBMODE_SINGLE:
3930         UdmDSTRReset(&qbuf);
3931         UdmDSTRAppendf(&qbuf, "DELETE FROM dict WHERE url_id in (%s)",
3932                        UdmDSTRPtr(&urlin));
3933         if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3934           goto ret;
3935         break;
3936       case UDM_SQLDBMODE_RAWBLOB:
3937         udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db), "ClearDB is not supported by this DBMode");
3938         rc= UDM_ERROR;
3939     }
3940 
3941     UdmDSTRReset(&qbuf);
3942     UdmDSTRAppendf(&qbuf, "DELETE FROM url WHERE rec_id in (%s)",UdmDSTRPtr(&urlin));
3943     if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3944       goto ret;
3945 
3946     UdmDSTRReset(&qbuf);
3947     UdmDSTRAppendf(&qbuf,"DELETE FROM urlinfo WHERE url_id in (%s)",UdmDSTRPtr(&urlin));
3948     if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3949       goto ret;
3950 
3951     UdmDSTRReset(&qbuf);
3952     UdmDSTRAppendf(&qbuf,"DELETE FROM cachedcopy WHERE url_id in (%s)",UdmDSTRPtr(&urlin));
3953     if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3954       goto ret;
3955 
3956     UdmDSTRReset(&qbuf);
3957     UdmDSTRAppendf(&qbuf,"DELETE FROM links WHERE url_id in (%s)",UdmDSTRPtr(&urlin));
3958     if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3959       goto ret;
3960 
3961     UdmDSTRReset(&qbuf);
3962     UdmDSTRAppendf(&qbuf,"DELETE FROM redirect WHERE url_id in (%s)",UdmDSTRPtr(&urlin));
3963     if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, UdmDSTRPtr(&qbuf))))
3964       goto ret;
3965 
3966     if (UDM_OK != (rc= UdmDBSQLCommit(Indexer, db)))
3967       goto ret;
3968   }
3969 
3970 ret:
3971   UdmDSTRFree(&qbuf);
3972   UdmDSTRFree(&urlin);
3973   return rc;
3974 }
3975 
3976 
3977 static udm_rc_t
UdmClearDBUsingLoop(UDM_AGENT * Indexer,UDM_DB * db,UDM_URLID_LIST * list)3978 UdmClearDBUsingLoop(UDM_AGENT *Indexer, UDM_DB *db, UDM_URLID_LIST *list)
3979 {
3980   udm_rc_t rc= UDM_OK;
3981   size_t i;
3982   UDM_DOCUMENT Doc;
3983   bzero((void*)&Doc, sizeof(Doc));
3984 
3985   for(i=0; i < list->nurls; i++)
3986   {
3987     UdmVarListReplaceInt(&Doc.Sections, "ID", list->urls[i]);
3988     if(UDM_OK != (rc= UdmDeleteURL(Indexer, &Doc, db)))
3989       break;
3990   }
3991   UdmDocFree(&Doc);
3992   return rc;
3993 }
3994 
3995 
3996 static udm_rc_t
UdmClearDBSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)3997 UdmClearDBSQL(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
3998 {
3999   udm_rc_t rc;
4000   const char *where, *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
4001   char ClearDBHook[128];
4002 
4003   rc= UdmSQLBuildWhereCondition(Indexer, db, Query, &where);
4004   udm_snprintf(ClearDBHook, sizeof(ClearDBHook), "%s",
4005                UdmVarListFindStr(&Indexer->Conf->Vars, "SQLClearDBHook", ""));
4006 
4007   if (rc != UDM_OK ||
4008       (ClearDBHook[0] && (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, NULL, ClearDBHook)))))
4009     return rc;
4010 
4011   if(!where[0])
4012   {
4013     return UdmTruncateDB(Indexer, db, Query);
4014   }
4015   else
4016   {
4017     UDM_URLID_LIST urllist;
4018     UDM_DSTR qbuf;
4019     UdmDSTRInit(&qbuf, 4096);
4020 
4021     bzero((void*) &urllist, sizeof(urllist));
4022     UdmDSTRAppendf(&qbuf,"SELECT url.rec_id, url.url FROM url%s WHERE url.rec_id<>%s0%s AND %s",
4023                    Query->from, qu, qu,  where);
4024 
4025     if (UDM_OK != (rc= UdmLoadSlowLimitWithSort(Indexer, db, &urllist, UdmDSTRPtr(&qbuf))))
4026       goto fin;
4027 
4028     rc= UdmSQLDBHaveIn(db) ? UdmClearDBUsingIN(Indexer, db, &urllist) :
4029                        UdmClearDBUsingLoop(Indexer, db, &urllist);
4030 
4031 fin:
4032     UdmFree(urllist.urls);
4033     UdmDSTRFree(&qbuf);
4034   }
4035   return rc;
4036 }
4037 
4038 
4039 /******************** Hrefs ****************************/
4040 
4041 static udm_bool_t
UdmStoreHrefsUseTnx(UDM_AGENT * A)4042 UdmStoreHrefsUseTnx(UDM_AGENT *A)
4043 {
4044   if (A->Conf->DBList.nitems > 1)
4045     return UDM_FALSE;
4046   if (UdmSQLDBType(&A->Conf->DBList.Item[0]) == UDM_DB_PGSQL &&
4047       UdmSQLDBVersion(&A->Conf->DBList.Item[0]) > 90100)
4048     return UDM_TRUE;
4049   return (UdmSQLDBFlags(&A->Conf->DBList.Item[0]) & UDM_SQL_HAVE_GOOD_COMMIT) ?
4050          UDM_TRUE : UDM_FALSE;
4051 }
4052 
4053 
4054 static udm_rc_t
UdmStoreHrefsLock(UDM_AGENT * A,UDM_DB * db)4055 UdmStoreHrefsLock(UDM_AGENT *A, UDM_DB *db)
4056 {
4057   if (UdmSQLDBType(db) == UDM_DB_MYSQL /*&&
4058       !UdmVarListFind(&Doc->Sections, "SQLExportHref")*/)
4059     return UdmDBSQLQuery(A, db, NULL, "LOCK TABLE url WRITE");
4060   return UDM_OK;
4061 }
4062 
4063 
4064 static udm_rc_t
UdmStoreHrefsUnlock(UDM_AGENT * A,UDM_DB * db)4065 UdmStoreHrefsUnlock(UDM_AGENT *A, UDM_DB *db)
4066 {
4067   if (UdmSQLDBType(db) == UDM_DB_MYSQL)
4068     return UdmDBSQLQuery(A, db, NULL, "UNLOCK TABLES");
4069   return UDM_OK;
4070 }
4071 
4072 
4073 static udm_rc_t
UdmHrefStartBulk(UDM_AGENT * A,UDM_DB * db)4074 UdmHrefStartBulk(UDM_AGENT *A, UDM_DB *db)
4075 {
4076   return UdmStoreHrefsUseTnx(A) ? UdmDBSQLBegin(A, db) :
4077                                   UdmStoreHrefsLock(A, db);
4078 }
4079 
4080 
4081 static udm_rc_t
UdmHrefStopBulk(UDM_AGENT * A,UDM_DB * db)4082 UdmHrefStopBulk(UDM_AGENT *A, UDM_DB *db)
4083 {
4084   return UdmStoreHrefsUseTnx(A) ? UdmDBSQLCommit(A, db) :
4085                                   UdmStoreHrefsUnlock(A, db);
4086 }
4087 
4088 
4089 static void
UdmHrefToVarList(UDM_VARLIST * Vars,const UDM_HREF * H)4090 UdmHrefToVarList(UDM_VARLIST *Vars, const UDM_HREF *H)
4091 {
4092   UdmVarListAddLst(Vars, &H->HrefVars, NULL, "*");
4093   UdmVarListReplaceInt(Vars, "Referrer-ID", H->Param.referrer);
4094   UdmVarListReplaceUnsigned(Vars,"Hops", H->Param.hops);
4095   UdmVarListReplaceStr(Vars,"URL",H->url?H->url:"");
4096   UdmVarListReplaceInt(Vars, "URL_ID", UdmStrHash32(H->url ? H->url : ""));
4097   UdmVarListReplaceInt(Vars,"Server_id", H->Param.server_id);
4098   UdmVarListReplaceInt(Vars, "HTDB_URL_ID", H->Param.rec_id);
4099 }
4100 
4101 
4102 static udm_rc_t
UdmHrefAdd(UDM_AGENT * A,UDM_DB * db,UDM_HREF * H)4103 UdmHrefAdd(UDM_AGENT *A, UDM_DB *db, UDM_HREF *H)
4104 {
4105   udm_rc_t rc;
4106   UDM_DOCUMENT Doc;
4107   UdmDocInit(&Doc);
4108   UdmHrefToVarList(&Doc.Sections, H);
4109   rc= UdmAddURL(A, &Doc, db);
4110   UdmDocFree(&Doc);
4111   UdmSQLDB(db)->last_notargets_time= 0;
4112   return rc;
4113 }
4114 
4115 
4116 static udm_rc_t
UdmHrefExport(UDM_AGENT * A,UDM_DB * db,UDM_HREF * H)4117 UdmHrefExport(UDM_AGENT *A, UDM_DB *db, UDM_HREF *H)
4118 {
4119   udm_rc_t rc;
4120   UDM_DOCUMENT Doc;
4121   const char *sql_export= UdmVarListFindStr(&H->HrefVars, "SQLExportHref", NULL);
4122   if (!sql_export)
4123     return UDM_OK;
4124   UdmDocInit(&Doc);
4125   UdmHrefToVarList(&Doc.Sections, H);
4126   rc= UdmExportURL(A, db, &Doc, sql_export);
4127   UdmDocFree(&Doc);
4128   UdmSQLDB(db)->last_notargets_time= 0;
4129   return rc;
4130 }
4131 
4132 
4133 static udm_rc_t
UdmHrefActionSQL(UDM_AGENT * A,UDM_DB * db,UDM_HREF * Href,udm_hrefcmd_t cmd)4134 UdmHrefActionSQL(UDM_AGENT *A, UDM_DB *db, UDM_HREF *Href, udm_hrefcmd_t cmd)
4135 {
4136   UDM_LOCK_CHECK_OWNER(A, UDM_LOCK_DB);
4137   switch (cmd)
4138   {
4139     case UDM_HREFCMD_START_BULK: return UdmHrefStartBulk(A, db);
4140     case UDM_HREFCMD_STOP_BULK:  return UdmHrefStopBulk(A, db);
4141     case UDM_HREFCMD_ADD:        return UdmHrefAdd(A, db, Href);
4142     case UDM_HREFCMD_EXPORT:     return UdmHrefExport(A, db, Href);
4143   }
4144   return UDM_ERROR;
4145 }
4146 
4147 
4148 /******************* WWList *************************/
4149 static udm_rc_t
UdmWWListExtend(UDM_AGENT * A,UDM_DB * db,UDM_WIDEWORDLIST * result,const UDM_WIDEWORD * uw,const char * sql)4150 UdmWWListExtend(UDM_AGENT *A, UDM_DB *db,
4151                 UDM_WIDEWORDLIST *result,
4152                 const UDM_WIDEWORD *uw,
4153                 const char *sql)
4154 {
4155   char *word= uw->Word.str;
4156   char buf[1024];
4157   size_t i, nrows;
4158   UDM_SQLRES SQLRes;
4159   UDM_WIDEWORD_PARAM Param;
4160   UdmBuildParamStr(buf, sizeof(buf), sql, &word, 1);
4161   if (UDM_OK != UdmDBSQLQuery(A, db, &SQLRes, buf))
4162     return UDM_ERROR;
4163   nrows= UdmSQLNumRows(&SQLRes);
4164   UdmWideWordParamInit(&Param);
4165   UdmWideWordParamCopySynonym(&Param, &uw->Param);
4166   for (i= 0; i < nrows; i++)
4167   {
4168     UDM_CONST_STR cstr;
4169     cstr.length= UdmSQLLen(&SQLRes, i, 0);
4170     cstr.str= UdmSQLValue(&SQLRes, i, 0);
4171     UdmWideWordListAddLikeConstStr(result, &Param, cstr);
4172   }
4173   UdmSQLFree(&SQLRes);
4174   return UDM_OK;
4175 }
4176 
4177 
4178 static udm_rc_t
UdmQueryWordForms(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)4179 UdmQueryWordForms(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
4180 {
4181   UDM_WIDEWORDLIST Tmp;
4182   udm_rc_t rc= UDM_OK;
4183   size_t i;
4184   const char *sql= UdmVarListFindStr(&A->Conf->Vars, "SQLWordForms", NULL);
4185   if (!sql)
4186     return UDM_OK;
4187   UdmWideWordListInit(&Tmp);
4188   for (i= 0; i < Query->Res.WWList.nwords; i++)
4189   {
4190     if (UDM_OK != (rc= UdmWWListExtend(A, db, &Tmp, &Query->Res.WWList.Word[i], sql)))
4191       break;
4192   }
4193   for (i= 0; i < Tmp.nwords; i++)
4194   {
4195     /* TODO34: change UdmWideWordListAddXXX() to return udm_rc_t */
4196     UdmWideWordListAdd(&Query->Res.WWList, &Tmp.Word[i]);
4197   }
4198   UdmWideWordListFree(&Tmp);
4199   return rc;
4200 }
4201 
4202 
4203 /******************* Search *************************/
4204 static int
cmp_score_urlid(UDM_URL_SCORE * s1,UDM_URL_SCORE * s2)4205 cmp_score_urlid(UDM_URL_SCORE *s1, UDM_URL_SCORE *s2)
4206 {
4207   if (s1->url_id > s2->url_id) return(1);
4208   if (s1->url_id < s2->url_id) return(-1);
4209   return 0;
4210 }
4211 
4212 
4213 static void
UdmScoreListToURLData(UDM_URLDATA * D,UDM_URL_SCORE * C,size_t num)4214 UdmScoreListToURLData(UDM_URLDATA *D, UDM_URL_SCORE *C, size_t num)
4215 {
4216   for ( ; num > 0; num--, D++, C++)
4217   {
4218     D->url_id= C->url_id;
4219     D->score= C->score;
4220   }
4221 }
4222 
4223 
4224 #ifdef HAVE_DEBUG
4225 static void
UdmURLScoreListPrint(UDM_URLSCORELIST * List)4226 UdmURLScoreListPrint(UDM_URLSCORELIST *List)
4227 {
4228   size_t i;
4229   for (i= 0; i < List->nitems; i++)
4230   {
4231     UDM_URL_SCORE *Item= &List->Item[i];
4232     fprintf(stderr, "%d:%d\n", Item->url_id, Item->score);
4233   }
4234 }
4235 #endif
4236 
4237 
4238 static void
UdmDebugScoreAppendScoreAndRank(UDM_VARLIST * Vars,const UDM_QUERY_PARAM * query_param,const UDM_URLDATALIST * DataList)4239 UdmDebugScoreAppendScoreAndRank(UDM_VARLIST *Vars,
4240                                 const UDM_QUERY_PARAM *query_param,
4241                                 const UDM_URLDATALIST *DataList)
4242 {
4243   UDM_VAR *var;
4244   if (query_param->DebugURLId &&
4245       (var= UdmVarListFindVar(Vars, "DebugScore")))
4246   {
4247     size_t i;
4248     for (i= 0; i < DataList->nitems; i++)
4249     {
4250       if (DataList->Item[i].url_id == query_param->DebugURLId)
4251       {
4252         char tmp[256];
4253         size_t length= udm_snprintf(tmp, sizeof(tmp), " rank=%d", (int) i + 1);
4254         UdmVarAppendStrn(var, tmp, length);
4255         break;
4256       }
4257     }
4258   }
4259 }
4260 
4261 
4262 static udm_rc_t
UdmSortAndGroupByURL(UDM_AGENT * A,UDM_QUERY * Query,UDM_SEARCHSECTIONLIST * SectionList,UDM_DB * db)4263 UdmSortAndGroupByURL(UDM_AGENT *A,
4264                      UDM_QUERY *Query,
4265                      UDM_SEARCHSECTIONLIST *SectionList,
4266                      UDM_DB *db)
4267 {
4268   UDM_QUERY_PARAM query_param;
4269   UDM_URLSCORELIST ScoreList;
4270   UDM_URLDATALIST DataList;
4271   udm_timer_t ticks=UdmStartTimer();
4272   const char *pattern= UdmVarListFindStr(&A->Conf->Vars, "s", "R");
4273   size_t nbytes;
4274   int flags= 0, flags2= 0;
4275   udm_rc_t rc= UDM_OK;
4276   const char *p;
4277   const char *su= UdmVarListFindStr(&A->Conf->Vars, "su", NULL);
4278   int group_by_site= UdmVarListFindBool(&A->Conf->Vars, "GroupBySite", UDM_FALSE)
4279                      && UdmVarListFindStr(&A->Conf->Vars, "site", "")[0] == '\0' ?
4280                      UDM_URLDATA_SITE : 0;
4281   int group_by_site_rank= !strcmp(UdmVarListFindStr(&A->Conf->Vars, "GroupBySite", "no"), "rank");
4282   size_t BdictThreshold= (size_t) UdmVarListFindInt(&A->Conf->Vars,
4283                                                     "URLDataThreshold", 0);
4284   size_t MaxResults= (size_t) UdmVarListFindInt(UdmSQLDBVars(db), "MaxResults", 0);
4285   udm_bool_t use_qcache= UdmVarListFindBool(UdmSQLDBVars(db), "qcache", UDM_FALSE);
4286   size_t num_best_rows= Query->num_best_rows;
4287 
4288   UdmQueryParamInit(&query_param, A->Conf, UdmSQLDBVars(db));
4289 
4290   flags|= group_by_site ? UDM_URLDATA_SITE : 0;
4291   flags|= group_by_site_rank ? UDM_URLDATA_SITE_RANK : 0;
4292   flags|= query_param.DateFactor ? UDM_URLDATA_LM : 0;
4293   flags2|= query_param.PopularityFactor > 0 ? UDM_URLDATA_POP : 0;
4294 
4295   for (p = pattern; *p; p++)
4296   {
4297     if (*p == 'U' || *p == 'u') flags|= UDM_URLDATA_URL;
4298     if (*p == 'D' || *p == 'd') flags|= UDM_URLDATA_LM;
4299     if (*p == 'S' || *p == 's') flags|= (su && su[0]) ? UDM_URLDATA_SU : 0;
4300     if (*p == 'P' || *p == 'p') flags2|= UDM_URLDATA_POP;
4301   }
4302 
4303   ticks=UdmStartTimer();
4304   bzero((void*) &ScoreList, sizeof(ScoreList));
4305   UdmURLDataListInit(&DataList);
4306 
4307   UdmLog(A,UDM_LOG_DEBUG, "Start GroupByURL %d sections", (int) SectionList->nsections);
4308   UdmGroupByURL2(A, db, Query, &query_param, SectionList, &ScoreList);
4309 
4310   UdmLog(A, UDM_LOG_DEBUG, "%-30s%.2f (%d docs found)",
4311          "Stop  GroupByURL", UdmStopTimer(&ticks), (int) ScoreList.nitems);
4312 
4313 #ifdef HAVE_DEBUG
4314   if (UdmVarListFindBool(&A->Conf->Vars, "DebugGroupByURL", UDM_FALSE))
4315   {
4316     UdmURLScoreListPrint(&ScoreList);
4317   }
4318 #endif
4319 
4320   UdmApplyCachedQueryLimit(A, &ScoreList, db);
4321   if (ScoreList.nitems == 0)
4322     goto ex;
4323 
4324   if (UDM_OK != (rc=  UdmUserScoreListLoadAndApplyToURLScoreList(A, &ScoreList,
4325                                                                  db,
4326                                                                  &query_param)))
4327     goto ex;
4328 
4329   UdmLog(A,UDM_LOG_DEBUG,"Start load url data %d docs (%d best needed)",
4330          (int) ScoreList.nitems, (int) Query->num_best_rows);
4331   ticks=UdmStartTimer();
4332 
4333   nbytes= UdmHashSize(ScoreList.nitems) * sizeof(UDM_URLDATA);
4334   DataList.Item = (UDM_URLDATA*)UdmMalloc(nbytes);
4335   bzero((void*) DataList.Item, nbytes);
4336   DataList.nitems= ScoreList.nitems;
4337 
4338   /* Use full sort in case if DebugURLId is specified */
4339   if (query_param.DebugURLId)
4340     num_best_rows= ScoreList.nitems;
4341 
4342   if (num_best_rows > ScoreList.nitems)
4343     num_best_rows= ScoreList.nitems;
4344 
4345   /* Try fast sorting if sorting is on score */
4346   if (num_best_rows < 256 && !flags && !flags2 && !use_qcache)
4347   {
4348     udm_timer_t ticks1;
4349 
4350     Query->stats.total_found= ScoreList.nitems;
4351     UdmLog(A, UDM_LOG_DEBUG, "Start SortByScore %d docs", (int) ScoreList.nitems);
4352     ticks1=UdmStartTimer();
4353     if (ScoreList.nitems > 1000)
4354     {
4355       UdmURLScoreListSortByScoreThenURLTop(&ScoreList, 1000);
4356     }
4357     else
4358     {
4359       UdmURLScoreListSortByScoreThenURL(&ScoreList);
4360     }
4361     UdmSort((void*) ScoreList.Item, num_best_rows,
4362             sizeof(UDM_URL_SCORE), (udm_qsort_cmp) cmp_score_urlid);
4363     UdmScoreListToURLData(DataList.Item, ScoreList.Item, num_best_rows);
4364     UdmLog(A,UDM_LOG_DEBUG,"%-30s%.2f", "Stop  SortByScore:", UdmStopTimer(&ticks1));
4365     DataList.nitems= num_best_rows; /* Put only num_best_rows into DataList */
4366     goto date_factor;
4367   }
4368 
4369   UdmScoreListToURLData(DataList.Item, ScoreList.Item, DataList.nitems);
4370 
4371   /* Sort by a user defined section, if given */
4372   if (flags & UDM_URLDATA_SU)
4373   {
4374     size_t norder;
4375     udm_timer_t ticks1= UdmStartTimer();
4376     UdmLog(A, UDM_LOG_DEBUG, "Trying to load fast section order '%s'", su);
4377     rc= UdmFastOrderLoadAndApplyToURLDataList(A, db, &DataList, su, &norder);
4378     UdmLog(A, UDM_LOG_DEBUG, "Loading fast order '%s' done, %d docs found, %.2f sec",
4379            su, (int) norder, UdmStopTimer(&ticks1));
4380     if (norder)
4381       flags^= UDM_URLDATA_SU;
4382   }
4383 
4384   if (flags)
4385   {
4386     if (!(UdmSQLDBMode(db) == UDM_SQLDBMODE_BLOB &&
4387          !(flags & UDM_URLDATA_URL)    &&
4388          !(flags & UDM_URLDATA_SU)     &&
4389            BdictThreshold < ScoreList.nitems) ||
4390         (UDM_NOTARGET == UdmLoadURLDataFromBdict(A, db, &DataList, flags)))
4391       rc= UdmLoadURLDataFromURL(A, db, &DataList, flags);
4392   }
4393 
4394   /* Do this before GroupBySite, while DataList is sorted by url_id */
4395   UdmLoadURLDataFromBdict(A, db, &DataList, flags2);
4396 
4397   if (group_by_site)
4398     UdmURLDataListGroupBySiteUsingSort(A, &DataList, db);
4399 
4400   if (UDM_OK != UdmUserSiteScoreListLoadAndApplyToURLDataList(A, &DataList, db,
4401                                                               &query_param))
4402   {
4403     rc= UDM_ERROR;
4404     goto ex;
4405   }
4406 
4407   if (flags & UDM_URLDATA_SITE_RANK)
4408   {
4409     udm_timer_t ticks1= UdmStartTimer();
4410     UdmLog(A, UDM_LOG_DEBUG, "Start applying in-site-rank");
4411     UdmURLDataSortBySite(&DataList);
4412     UdmURLDataApplySiteRank(A, &DataList, 0);
4413     UdmLog(A, UDM_LOG_DEBUG, "Stop applying in-site-rank:   %.2f sec", UdmStopTimer(&ticks1));
4414   }
4415 
4416   Query->stats.total_found= DataList.nitems;
4417 
4418 date_factor:
4419 
4420   if (rc != UDM_OK)
4421     goto ex;
4422 
4423   /* TODO: check whether limit by site works fine */
4424   if (!query_param.RelevancyFactor || query_param.DateFactor)
4425     UdmURLDataListApplyRelevancyFactors(A, &DataList, &query_param);
4426 
4427   if (query_param.PopularityFactor > 0)
4428     UdmURLDataListApplyPopularity(A, &DataList, &query_param);
4429 
4430   UdmLog(A,UDM_LOG_DEBUG,"%-30s%.2f", "Stop  load url data:", UdmStopTimer(&ticks));
4431 
4432   UdmLog(A, UDM_LOG_DEBUG, "Start SortByPattern %d docs", (int) DataList.nitems);
4433   ticks=UdmStartTimer();
4434   if (DataList.nitems)
4435     UdmURLDataSortByPattern(&DataList, pattern);
4436   UdmLog(A,UDM_LOG_DEBUG,"%-30s%.2f", "Stop  SortByPattern:", UdmStopTimer(&ticks));
4437 
4438   Query->URLData= DataList;
4439   bzero((void *) &DataList, sizeof(DataList));
4440   UdmDebugScoreAppendScoreAndRank(&A->Conf->Vars, &query_param, &Query->URLData);
4441 
4442   if (MaxResults && MaxResults < Query->stats.total_found)
4443   {
4444     UdmLog(A, UDM_LOG_DEBUG, "Applying MaxResults=%d, total_found=%d\n",
4445            (int) MaxResults, (int) Query->stats.total_found);
4446     Query->stats.total_found= MaxResults;
4447     if (Query->URLData.nitems > MaxResults)
4448     {
4449       /* Free the part of URLData that will not be unused */
4450       UdmURLDataListFreeItems(&Query->URLData, MaxResults, Query->URLData.nitems);
4451       Query->URLData.nitems= MaxResults;
4452     }
4453   }
4454 
4455 ex:
4456   UdmURLDataListFree(&DataList);
4457   UdmFree(ScoreList.Item);
4458   return rc;
4459 }
4460 
4461 
4462 static udm_rc_t /* WHERE limit */
LoadURL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,const char * where,UDM_URLID_LIST * buf)4463 LoadURL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
4464         const char *where, UDM_URLID_LIST *buf)
4465 {
4466   udm_rc_t rc;
4467   UDM_SQLRES SQLRes;
4468   char qbuf[1024 * 4];
4469   size_t nrows;
4470   urlid_t *tmp;
4471   size_t i;
4472 
4473   if (!*where)
4474     return UDM_OK;
4475 
4476   /* TODO: reuse LoadSlowLimitWithSort() here */
4477   udm_snprintf(qbuf, sizeof(qbuf),
4478                "SELECT url.rec_id FROM url%s WHERE %s",
4479                Query->from, where);
4480   if  (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLRes, qbuf)))
4481     return rc;
4482 
4483   if (!(nrows= UdmSQLNumRows(&SQLRes)))
4484   {
4485     buf->empty= 1;
4486     UdmSQLFree(&SQLRes);
4487     return(UDM_OK);
4488   }
4489 
4490   tmp= (urlid_t*) UdmMalloc(sizeof(urlid_t) * nrows);
4491   buf->urls= (urlid_t*) UdmMalloc(sizeof(urlid_t) * nrows);
4492   if (!tmp || !buf->urls)
4493   {
4494     UDM_FREE(buf->urls);
4495     UDM_FREE(tmp);
4496     goto ex;
4497   }
4498 
4499   for (i= 0; i < nrows; i++)
4500   {
4501     tmp[i]= (urlid_t) UDM_ATOI(UdmSQLValue(&SQLRes, i, 0));
4502   }
4503   UdmSort(tmp, nrows, sizeof(urlid_t), (udm_qsort_cmp)UdmCmpURLID);
4504 
4505   /* Remove duplicates */
4506   for (i= 0; i < nrows; )
4507   {
4508     while (++i < nrows && tmp[i] == tmp[i - 1]);
4509     buf->urls[buf->nurls++] = tmp[i - 1];
4510   }
4511   UDM_FREE(tmp);
4512   if ((tmp= (urlid_t*) UdmRealloc(buf->urls, sizeof(urlid_t) * buf->nurls)))
4513     buf->urls = tmp;
4514 
4515 ex:
4516   UdmSQLFree(&SQLRes);
4517   return UDM_OK;
4518 }
4519 
4520 
4521 /*
4522   MySQL: no cast needed
4523   - SQLite: CAST(word AS INTEGER)
4524   - PostgreSQL: CASE WHEN a~'^[0-9]*$' THEN a::integer ELSE 0 END
4525   - MSSQL:    CAST(word AS INTEGER)
4526     There is a function ISNUMERIC(). However, it returns true
4527     for things like "0x0123d", Cast does not work for this.
4528 
4529   - Sybase:   returns error when input is non-numeric
4530   - Oracle:   returns error ...
4531   - IBM DB2:  returns error ...
4532   - Firebird: return error ...
4533   - Mimer:    return error ...
4534 */
4535 static void
UdmBuildNumericOperatorCondition(UDM_DB * db,char * cmparg,size_t maxlen,const char * op,int number)4536 UdmBuildNumericOperatorCondition(UDM_DB *db, char *cmparg, size_t maxlen,
4537                                  const char *op, int number)
4538 {
4539   switch (UdmSQLDBType(db))
4540   {
4541     case UDM_DB_MYSQL:
4542       udm_snprintf(cmparg, maxlen, "word%s%d", op, number);
4543       break;
4544     case UDM_DB_PGSQL:
4545       udm_snprintf(cmparg, maxlen, "(word~'^[0-9]*$' AND word::integer%s%d)", op, number);
4546       break;
4547     default:
4548       udm_snprintf(cmparg, maxlen, "(word>='0' AND word <='99999999999' AND CAST(word AS INTEGER)%s%d)", op, number);
4549   }
4550 }
4551 
4552 
4553 static udm_rc_t
UdmBuildCmpArgSQL(UDM_AGENT * A,UDM_DB * db,udm_match_mode_t match_mode,const char * word,char * cmparg,size_t maxlen)4554 UdmBuildCmpArgSQL(UDM_AGENT *A, UDM_DB *db, udm_match_mode_t match_mode,
4555                   const char *word, char *cmparg, size_t maxlen)
4556 {
4557   const char *left= "", *right= "";
4558   size_t length= strlen(word);
4559   char escwrd[1000];
4560 
4561   if (match_mode == UDM_MATCH_RANGE)
4562   {
4563     UDM_ASSERT(length > 6);
4564     if (*word == '[')
4565       left= ">=";
4566     else if (*word == '{')
4567       left= ">";
4568     if (word[length - 1] == ']')
4569       right= "<=";
4570     else if (word[length - 1] == '}')
4571       right= "<";
4572     word++;
4573     length-= 2;
4574   }
4575 
4576   UdmDBSQLEscStr(A, db, escwrd, word, length); /* Search word */
4577   switch (match_mode)
4578   {
4579     case UDM_MATCH_BEGIN:
4580       udm_snprintf(cmparg, maxlen, "word LIKE '%s%%'", escwrd);
4581       break;
4582     case UDM_MATCH_END:
4583       udm_snprintf(cmparg, maxlen, "word LIKE '%%%s'", escwrd);
4584       break;
4585     case UDM_MATCH_SUBSTR:
4586       udm_snprintf(cmparg, maxlen, "word LIKE '%%%s%%'", escwrd);
4587       break;
4588     case UDM_MATCH_NUMERIC_LT:
4589       UdmBuildNumericOperatorCondition(db, cmparg, maxlen, "<", atoi(escwrd));
4590       break;
4591     case UDM_MATCH_NUMERIC_GT:
4592       UdmBuildNumericOperatorCondition(db, cmparg, maxlen, ">", atoi(escwrd));
4593       break;
4594     case UDM_MATCH_RANGE:
4595       {
4596         char *first= escwrd;
4597         char *second= strstr(first, " TO ");
4598         if (!second)
4599         {
4600           udm_snprintf(cmparg, maxlen, "word='<ERROR>'");
4601           return UDM_ERROR;
4602         }
4603         *second= '\0';
4604         second+= 4;
4605         udm_snprintf(cmparg, maxlen, "word%s'%s' AND word%s'%s'",
4606                      left, first, right, second);
4607       }
4608       break;
4609     case UDM_MATCH_FULL:
4610     default:
4611       udm_snprintf(cmparg, maxlen, "word='%s'", escwrd);
4612       break;
4613   }
4614   return(UDM_OK);
4615 }
4616 
4617 
4618 static udm_rc_t
UdmFindOneWordSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args)4619 UdmFindOneWordSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
4620                   UDM_FINDWORD_ARGS *args)
4621 {
4622   char cmparg[256];
4623   UdmBuildCmpArgSQL(A, db,
4624                     args->Word.Param.match_mode, args->Word.Word.str,
4625                     cmparg, sizeof(cmparg));
4626   args->cmparg= cmparg;
4627 
4628   UDM_ASSERT(UdmSQLDBModeHandler(db)->FindWord != NULL);
4629   return UdmSQLDBModeHandler(db)->FindWord(A, db, Query, args);
4630 }
4631 
4632 
4633 static udm_rc_t
UdmFindMultiWordSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args)4634 UdmFindMultiWordSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
4635                     UDM_FINDWORD_ARGS *args)
4636 {
4637   char *lt, *tmp_word, *tok;
4638   udm_rc_t rc= UDM_OK;
4639   UDM_SEARCHSECTIONLISTLIST OriginalSearchSectionListList;
4640   size_t orig_wordnum;
4641   size_t nparts= 0;
4642   const char *w;
4643   char *orig_word= args->Word.Word.str;
4644   char delim[]= " \r\t_-./";
4645 
4646   /* Check if the word really multi-part */
4647   for (w= args->Word.Word.str; ; w++)
4648   {
4649     if (!*w)
4650       return UdmFindOneWordSQL(A, db, Query, args); /* No delimiters found */
4651 
4652     if (strchr(delim, *w)) /* Delimiter found */
4653       break;
4654   }
4655 
4656   if (!(tmp_word= UdmStrdup(args->Word.Word.str)))
4657     return(UDM_ERROR);
4658 
4659   UdmLog(A, UDM_LOG_DEBUG,
4660          "Start searching for multiword '%s'", args->Word.Word.str);
4661   OriginalSearchSectionListList= args->SearchSectionListList;
4662   UdmSearchSectionListListInit(&args->SearchSectionListList);
4663   orig_wordnum= args->Word.Param.order;
4664   args->need_coords= 1; /* Force immediate coord unpacking */
4665 
4666   for (tok= udm_strtok_r(tmp_word, delim, &lt) ; tok ;
4667        tok= udm_strtok_r(NULL, delim, &lt))
4668   {
4669     udm_timer_t ticks1= UdmStartTimer();
4670     args->Word.Word.str= tok;
4671     UdmLog(A, UDM_LOG_DEBUG,
4672            "Searching for subword '%s'", args->Word.Word.str);
4673     rc= UdmFindOneWordSQL(A, db, Query, args);
4674     UdmLog(A, UDM_LOG_DEBUG,
4675            "Stop searching for subword '%s' %d coords found: %.2f",
4676            args->Word.Word.str, (int) args->Word.Param.count, UdmStopTimer(&ticks1));
4677     /* If the next word wasn't found - no need to search for others. */
4678     if (rc != UDM_OK || !args->Word.Param.count)
4679       goto ret;
4680     nparts++;
4681     args->Word.Param.order++;
4682   }
4683 
4684   /* All parts returned results. Check phrase */
4685   UdmMultiWordAdd(&args->SearchSectionListList, &Query->Res.WWList, &args->Word,
4686                   &args->urls, &OriginalSearchSectionListList,
4687                   orig_wordnum, nparts);
4688 
4689 
4690 ret:
4691   UdmFree(tmp_word);
4692   UdmSearchSectionListListFree(&args->SearchSectionListList);
4693   args->SearchSectionListList= OriginalSearchSectionListList;
4694   args->Word.Word.str= orig_word;
4695   args->need_coords= 0;
4696   UdmLog(A, UDM_LOG_DEBUG,
4697          "Stop searching for multiword '%s'", args->Word.Word.str);
4698   return rc;
4699 }
4700 
4701 
4702 static udm_rc_t
UdmFindAlwaysFoundWordSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args)4703 UdmFindAlwaysFoundWordSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query,
4704                           UDM_FINDWORD_ARGS *args)
4705 {
4706   udm_rc_t rc= UDM_OK;
4707   UDM_SQLRES SQLRes;
4708   char qbuf[1024 * 4];
4709   size_t nrows;
4710   size_t i;
4711   UDM_URLCRDLIST CoordList;
4712   UDM_URL_CRD Coord;
4713 
4714   bzero((void*) &CoordList, sizeof(CoordList));
4715   bzero((void*) &Coord, sizeof(Coord));
4716   Coord.urlid_coord.coord.pos= 0x00010100; /* TODO34: why? */
4717   Coord.num= args->Word.Param.order;
4718 
4719   if (*args->where)
4720      udm_snprintf(qbuf, sizeof(qbuf), "SELECT url.rec_id FROM url%s WHERE %s",
4721                   Query->from, args->where);
4722   else
4723   {
4724     if (args->urls.nurls)
4725     {
4726       /*
4727         A fast limit is loaded.
4728         No needs to do "SELECT FROM url".
4729         Populate CoordList from the fast limit instead.
4730       */
4731       for (i= 0; i < args->urls.nurls; i++)
4732       {
4733         Coord.urlid_coord.url_id= args->urls.urls[i];
4734         if (UDM_OK != (rc= UdmAddOneCoord(&CoordList, &Coord)))
4735           return UDM_ERROR;
4736       }
4737       UdmURLCRDListListAddWithSort2(&args->SearchSectionListList,
4738                                     &Query->Res.WWList, &args->Word, &CoordList);
4739       return UDM_OK;
4740     }
4741     udm_snprintf(qbuf, sizeof(qbuf), "SELECT url.rec_id FROM url");
4742   }
4743 
4744   if ((rc= UdmDBSQLQuery(A, db, &SQLRes, qbuf)) != UDM_OK)
4745     return(rc);
4746   /* Note that rc is implicitly set to UDM_OK at this point. */
4747   if (! (nrows= UdmSQLNumRows(&SQLRes)))
4748     goto err;
4749 
4750   for (i = 0; i < nrows; i++)
4751   {
4752     Coord.urlid_coord.url_id= (urlid_t) UDM_ATOI(UdmSQLValue(&SQLRes, i, 0));
4753     if (UDM_OK != (rc= UdmAddOneCoord(&CoordList, &Coord)))
4754       break;
4755   }
4756 
4757   if (args->urls.nurls)
4758     UdmApplyFastLimit(&CoordList, &args->urls);
4759   if (CoordList.ncoords)
4760     UdmURLCRDListListAddWithSort2(&args->SearchSectionListList,
4761                                   &Query->Res.WWList, &args->Word, &CoordList);
4762 
4763 err:
4764   UdmSQLFree(&SQLRes);
4765   return(rc);
4766 }
4767 
4768 
4769 static udm_rc_t
UdmCheckIndex(UDM_AGENT * A,UDM_DB * db)4770 UdmCheckIndex(UDM_AGENT *A, UDM_DB *db)
4771 {
4772   int tm;
4773   udm_rc_t rc;
4774   if (UDM_OK != (rc= UdmBlobReadTimestamp(A, db, &tm, 0)))
4775     return rc;
4776   if (tm)
4777     return UDM_OK;
4778 #ifdef WIN32
4779   sprintf(A->Conf->errstr, "Inverted word index not found. Probably you forgot to run 'Create fast index'.");
4780 #else
4781   sprintf(A->Conf->errstr, "Inverted word index not found. Probably you forgot to run 'indexer --index'.");
4782 #endif
4783   return UDM_ERROR;
4784 }
4785 
4786 
4787 static udm_rc_t
UdmMergeWords(UDM_AGENT * A,UDM_DB * db,UDM_FINDWORD_ARGS * args,UDM_SEARCHSECTIONLIST * SectionList)4788 UdmMergeWords(UDM_AGENT *A, UDM_DB *db,
4789               UDM_FINDWORD_ARGS *args, UDM_SEARCHSECTIONLIST *SectionList)
4790 {
4791   udm_timer_t ticks= UdmStartTimer();
4792 
4793   UdmLog(A, UDM_LOG_DEBUG,
4794          "Start merging %d lists", (int) args->SearchSectionListList.nitems);
4795   UdmSearchSectionListListMergeSorted(&args->SearchSectionListList, SectionList, 1);
4796   UdmLog(A, UDM_LOG_DEBUG, "%-30s%.2f (%d sections)",
4797          "Stop  merging:", UdmStopTimer(&ticks), (int) SectionList->nsections);
4798 
4799   if (!SectionList->nsections &&
4800       UdmSQLDBMode(db) == UDM_SQLDBMODE_BLOB &&
4801       !args->live_updates)
4802     return UdmCheckIndex(A, db);
4803   return UDM_OK;
4804 }
4805 
4806 
4807 static udm_rc_t
UdmSearchParamInit(UDM_FINDWORD_ARGS * args,UDM_AGENT * A,UDM_QUERY * Query,UDM_DB * db)4808 UdmSearchParamInit(UDM_FINDWORD_ARGS *args,
4809                    UDM_AGENT *A,
4810                    UDM_QUERY *Query,
4811                    UDM_DB *db)
4812 {
4813   bzero((void*) args, sizeof(*args));
4814   UdmWideWordListInit(&args->CollationMatches);
4815   UdmQueryParamInit(&args->query_param, A->Conf, UdmSQLDBVars(db));
4816   args->Word.Param.match_mode= UdmMatchMode(UdmVarListFindStr(&A->Conf->Vars, "wm", "wrd"));
4817   args->live_updates= UdmVarListFindBool(UdmSQLDBVars(db), "LiveUpdates", UDM_FALSE);
4818   return UdmSQLBuildWhereCondition(A, db, Query, &args->where);
4819 }
4820 
4821 
4822 static void
UdmSearchParamFree(UDM_FINDWORD_ARGS * args)4823 UdmSearchParamFree(UDM_FINDWORD_ARGS *args)
4824 {
4825   UDM_FREE(args->urls.urls);
4826   UDM_FREE(args->live_update_active_urls.urls);
4827   UDM_FREE(args->live_update_deleted_urls.urls);
4828   UdmWideWordListFree(&args->CollationMatches);
4829   UdmSearchSectionListListFree(&args->SearchSectionListList);
4830   UdmSQLResListFree(&args->SQLResults);
4831   UdmInvertedIndexCacheFree(&args->IndexCache);
4832 }
4833 
4834 
4835 static udm_rc_t
UdmFindLoadSlowOrFastLimit(UDM_AGENT * A,UDM_DB * db,UDM_URLID_LIST * list,const char * fl,int count)4836 UdmFindLoadSlowOrFastLimit(UDM_AGENT *A,
4837                            UDM_DB *db,
4838                            UDM_URLID_LIST *list,
4839                            const char *fl,
4840                            int count)
4841 {
4842   udm_rc_t rc;
4843   char name[64];
4844   const char *q;
4845   UDM_URLID_LIST fl_urls;
4846   bzero((void*) &fl_urls, sizeof(fl_urls));
4847   if ((fl_urls.exclude= (fl[0] == '-')))
4848     fl++;
4849   udm_snprintf(name, sizeof(name), "Limit.%s", fl);
4850   if (UDM_OK != (rc= ((q= UdmVarListFindStr(&A->Conf->Vars, name, NULL)) ?
4851                      UdmLoadSlowLimitWithSort(A, db, &fl_urls, q) :
4852                      UdmBlobLoadFastURLLimit(A, db, fl, &fl_urls))))
4853     goto ret;
4854   UdmLog(A,UDM_LOG_DEBUG, "Limit '%s' loaded%s%s %d URLs",
4855          fl, fl_urls.exclude ? " type=excluding" : "",
4856          q ? " source=slow":"", (int) fl_urls.nurls);
4857   if (!count)
4858   {
4859     list->exclude= fl_urls.exclude;
4860     UdmURLIdListUnion(list, &fl_urls);
4861   }
4862   else
4863   {
4864     if (list->exclude == fl_urls.exclude)
4865       UdmURLIdListUnion(list, &fl_urls);
4866     else
4867       UdmURLIdListJoin(list, &fl_urls);
4868   }
4869   UDM_FREE(fl_urls.urls);
4870 ret:
4871   return rc;
4872 }
4873 
4874 
4875 /*
4876   Load WHERE and fl limits from the database at search time.
4877 */
4878 static udm_rc_t
UdmFindLoadLimits(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args,const char * fl)4879 UdmFindLoadLimits(UDM_AGENT *A,
4880                   UDM_DB *db,
4881                   UDM_QUERY *Query,
4882                   UDM_FINDWORD_ARGS *args,
4883                   const char *fl)
4884 {
4885   udm_rc_t rc= UDM_OK;
4886   udm_timer_t ticks= UdmStartTimer();
4887 
4888   UdmLog(A, UDM_LOG_DEBUG, "Start loading limits");
4889   ticks= UdmStartTimer();
4890   if (*args->where)
4891   {
4892     LoadURL(A, db, Query, args->where, &args->urls);
4893     UdmLog(A, UDM_LOG_DEBUG,
4894            "WHERE limit loaded. %d URLs found", (int) args->urls.nurls);
4895   }
4896   if (!args->urls.empty && fl[0])
4897   {
4898     UDM_URLID_LIST lim;
4899     char delim[]= " ,", names[128], *tok, *lt;
4900     int nlimits= 0;
4901     bzero(&lim, sizeof(lim));
4902     udm_snprintf(names, sizeof(names), "%s", fl);
4903     for (tok= udm_strtok_r(names, delim, &lt) ; tok ;
4904          tok= udm_strtok_r(NULL, delim, &lt), nlimits++)
4905     {
4906       if (UDM_OK != (rc= UdmFindLoadSlowOrFastLimit(A, db, &lim,
4907                                                     tok, nlimits)))
4908         goto ret;
4909     }
4910     if (nlimits)
4911       UdmURLIdListMerge(&args->urls, &lim);
4912     UDM_FREE(lim.urls);
4913   }
4914   UdmLog(A, UDM_LOG_DEBUG, "%-30s%.2f (%d URLs found)",
4915          "Stop  loading limits", UdmStopTimer(&ticks), (int) args->urls.nurls);
4916 ret:
4917   return rc;
4918 }
4919 
4920 
4921 /*
4922   Load word information from the database
4923 */
4924 static udm_rc_t
UdmFindWordsFetch(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,UDM_FINDWORD_ARGS * args,const char * always_found_word)4925 UdmFindWordsFetch(UDM_AGENT *A,
4926                   UDM_DB *db,
4927                   UDM_QUERY *Query,
4928                   UDM_FINDWORD_ARGS *args,
4929                   const char *always_found_word)
4930 {
4931   size_t wordnum;
4932   udm_rc_t rc= UDM_OK;
4933   udm_timer_t ticks0= UdmStartTimer();
4934 
4935   UdmLog(A, UDM_LOG_DEBUG, "Start fetching words");
4936 
4937   /* Now find each word */
4938   for(wordnum=0; wordnum < Query->Res.WWList.nwords; wordnum++)
4939   {
4940     udm_timer_t ticks= UdmStartTimer();
4941     UDM_WIDEWORD *W= &Query->Res.WWList.Word[wordnum];
4942     char quoted_word[64];
4943     udm_snprintf(quoted_word, sizeof(quoted_word), "'%s'", W->Word.str);
4944 
4945     if (W->Param.origin == UDM_WORD_ORIGIN_STOP) continue;
4946 
4947     UdmLog(A, UDM_LOG_DEBUG, "Start search for %s", quoted_word);
4948 
4949     args->Word.Param.order= wordnum;
4950     args->Word.Param.count= 0;
4951     args->Word.Word.str= W->Word.str;
4952     args->Word.Param.match_mode= W->Param.match_mode;
4953     args->Word.Param.secno= W->Param.secno;
4954 
4955     /*
4956        For now SYNONYMs only are treated as a possible multi-word
4957        origin. Probably it will be changed in future, so we will
4958        use this feature for phrase search.
4959      */
4960     if (always_found_word && !strcmp(W->Word.str, always_found_word))
4961       rc= UdmFindAlwaysFoundWordSQL(A, db, Query, args);
4962     else if (W->Param.origin == UDM_WORD_ORIGIN_SYNONYM ||
4963              W->Param.phrwidth > 0)
4964       rc= UdmFindMultiWordSQL(A, db, Query, args);
4965     else
4966       rc= UdmFindOneWordSQL(A, db, Query, args);
4967 
4968     if (rc != UDM_OK)
4969       goto ret;
4970 
4971     /*
4972       If CollationMatches is not empty, then we should skip
4973       updating word statistics here - it will be updated in
4974       the loop after UdmSortAndGroupByURL().
4975      */
4976     if (!args->CollationMatches.nwords)
4977       Query->Res.WWList.Word[wordnum].Param.count+= args->Word.Param.count;
4978 
4979     UdmLog(A, UDM_LOG_DEBUG,
4980            "Stop  search for %-13s%.2f (%u coords found)",
4981            quoted_word, UdmStopTimer(&ticks), (int) args->Word.Param.count);
4982   }
4983   UdmLog(A, UDM_LOG_DEBUG,
4984          "%-30s%.2f", "Stop  fetching words:", UdmStopTimer(&ticks0));
4985 ret:
4986   return rc;
4987 }
4988 
4989 
4990 static udm_rc_t
UdmFindWordsSQLNoCached(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)4991 UdmFindWordsSQLNoCached(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
4992 {
4993   const char  *always_found_word, *fl;
4994   udm_rc_t rc= UDM_OK;
4995   UDM_FINDWORD_ARGS args;
4996 
4997   /* Query->SectionList must be clean */
4998   UDM_ASSERT(!Query->SectionList.mcoords);
4999   UDM_ASSERT(!Query->SectionList.ncoords);
5000   UDM_ASSERT(!Query->SectionList.Coord);
5001   UDM_ASSERT(!Query->SectionList.msections);
5002   UDM_ASSERT(!Query->SectionList.nsections);
5003   UDM_ASSERT(!Query->SectionList.Section);
5004 
5005   UDM_GETLOCK(A, UDM_LOCK_DB);
5006   {
5007     if (UDM_OK != (rc= UdmSearchParamInit(&args, A, Query, db)) &&
5008         UdmDBSQLError(db)[0])
5009     {
5010       UdmEnvCopyErrMsgFromDB(A->Conf, db);
5011     }
5012 
5013 
5014     always_found_word= UdmVarListFindStr(&A->Conf->Vars, "AlwaysFoundWord", NULL);
5015     fl= UdmVarListFindStr(&A->Conf->Vars, "fl", UdmVarListFindStr(UdmSQLDBVars(db), "fl", ""));
5016   }
5017   UDM_RELEASELOCK(A, UDM_LOCK_DB);
5018   if (rc != UDM_OK)
5019     goto ret;
5020 
5021   if ((UdmSQLDBMode(db) == UDM_SQLDBMODE_BLOB && args.where) || fl[0])
5022   {
5023     if (UDM_OK != UdmFindLoadLimits(A, db, Query, &args, fl))
5024       goto ret;
5025 
5026     if (args.urls.empty)
5027       goto ret;
5028   }
5029 
5030 
5031   if (UDM_OK != (rc= UdmSQLDBModeHandler(db)->InitSearch(A, db, Query, &args)))
5032     goto ret;
5033 
5034   if (UDM_OK != (rc= UdmFindWordsFetch(A, db, Query, &args, always_found_word)))
5035     goto ret;
5036 
5037   if (UDM_OK != (rc= UdmMergeWords(A, db, &args, &Query->SectionList)))
5038     goto ret;
5039 
5040   if (UDM_OK != (rc= UdmSortAndGroupByURL(A, Query, &Query->SectionList, db)))
5041     goto ret;
5042 
5043   /*
5044      We cannot add collation matches before
5045      UdmSortAndGroupByURL - to use optimized groupping
5046      functions when WWList->nwords==1
5047   */
5048   if (args.CollationMatches.nwords)
5049   {
5050     size_t i;
5051     UdmWideWordListSort(&args.CollationMatches);
5052     for (i= 0; i < args.CollationMatches.nwords; i++)
5053     {
5054       UdmWideWordListAdd(&Query->Res.WWList, &args.CollationMatches.Word[i]);
5055     }
5056   }
5057 
5058 ret:
5059   UdmSearchParamFree(&args);
5060   return rc;
5061 }
5062 
5063 
5064 static udm_rc_t
UdmFindWordsSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5065 UdmFindWordsSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5066 {
5067   udm_rc_t rc;
5068   if (UDM_OK != (rc= UdmQueryCacheGetSQL(A, db, Query)))
5069     return rc;
5070   if (!Query->URLData.nitems)
5071   {
5072     rc= UdmFindWordsSQLNoCached(A, db, Query);
5073     if (rc == UDM_OK && Query->URLData.nitems)
5074       rc= UdmQueryCachePutSQL(A, db, Query);
5075   }
5076   return rc;
5077 }
5078 
5079 /****************** Track ***********************************/
5080 
5081 static udm_rc_t
UdmTrackSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5082 UdmTrackSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5083 {
5084   UDM_VARLIST Vars;
5085   char    *qbuf;
5086   char    *text_escaped;
5087   udm_bool_t trackquery= UdmVarListFindBool(UdmSQLDBVars(db), "trackquery", UDM_FALSE);
5088   const char  *words= UdmVarListFindStr(&A->Conf->Vars,"q",""); /* "q-lc" was here */
5089   const char      *IP = UdmVarListFindStr(&A->Conf->Vars, "IP", "");
5090   size_t          i, escaped_len, qbuf_len;
5091   int             qtime, rec_id;
5092   udm_rc_t rc;
5093   const char      *qu = (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "";
5094 
5095   if (!trackquery)
5096     return UDM_OK;
5097 
5098   if (*words == '\0') return UDM_OK; /* do not store empty queries */
5099 
5100   escaped_len = 4 * strlen(words);
5101   qbuf_len = escaped_len + 4096;
5102 
5103   if ((qbuf = (char*)UdmMalloc(qbuf_len)) == NULL) return UDM_ERROR;
5104   if ((text_escaped = (char*)UdmMalloc(escaped_len)) == NULL)
5105   {
5106     UDM_FREE(qbuf);
5107     return UDM_ERROR;
5108   }
5109 
5110   UdmVarListInit(&Vars);
5111   UdmVarListSQLEscape(A, &Vars, &A->Conf->Vars, db);
5112 
5113   /* Escape text to track it  */
5114   UdmDBSQLEscStr(A, db, text_escaped, words, strlen(words)); /* query for tracking */
5115 
5116   if (UdmSQLDBType(db) == UDM_DB_IBASE ||
5117       UdmSQLDBType(db) == UDM_DB_MIMER ||
5118       UdmSQLDBType(db) == UDM_DB_ORACLE8)
5119   {
5120     const char *next;
5121     switch (UdmSQLDBType(db))
5122     {
5123       case UDM_DB_IBASE: next= "SELECT GEN_ID(qtrack_GEN,1) FROM rdb$database"; break;
5124       case UDM_DB_MIMER: next= "SELECT NEXT_VALUE OF qtrack_GEN FROM system.onerow"; break;
5125       case UDM_DB_ORACLE8: next= "SELECT qtrack_seq.nextval FROM dual"; break;
5126       default: next= NULL; /* Make compiler happy */
5127     }
5128     if (UDM_OK != (rc= UdmDBSQLQueryOneRowInt(A, db, &rec_id, next)))
5129       goto UdmTrack_exit;
5130     udm_snprintf(qbuf, qbuf_len - 1,
5131                  "INSERT INTO qtrack (rec_id,ip,qwords,qtime,wtime,nfound) "
5132                  "VALUES "
5133                  "(%d,'%s','%s',%d,%d,%d)",
5134                  rec_id, IP, text_escaped, qtime= (int)time(NULL),
5135                  UdmVarListFindInt(&Query->Res.Vars, "SearchTime", 0),
5136                  (int) UdmResultTotalFound(&Query->Res));
5137     if (UDM_OK != (rc = UdmDBSQLQuery(A, db, NULL, qbuf)))
5138       goto UdmTrack_exit;
5139   }
5140   else
5141   {
5142     udm_snprintf(qbuf, qbuf_len - 1,
5143                  "INSERT INTO qtrack (ip,qwords,qtime,wtime,nfound) "
5144                  "VALUES "
5145                  "('%s','%s',%d,%d,%d)",
5146                  IP, text_escaped, qtime= (int)time(NULL),
5147                  UdmVarListFindInt(&Query->Res.Vars, "SearchTime", 0),
5148                  (int) UdmResultTotalFound(&Query->Res));
5149 
5150     if (UDM_OK != (rc= UdmDBSQLQuery(A, db, NULL, qbuf)))
5151       goto UdmTrack_exit;
5152 
5153     if (UdmSQLDBType(db) == UDM_DB_MYSQL)
5154       udm_snprintf(qbuf, qbuf_len - 1, "SELECT last_insert_id()");
5155     else
5156       udm_snprintf(qbuf, qbuf_len - 1, "SELECT rec_id FROM qtrack WHERE ip='%s' AND qtime=%d", IP, qtime);
5157     if (UDM_OK != (rc= UdmDBSQLQueryOneRowInt(A, db, &rec_id, qbuf)))
5158       goto UdmTrack_exit;
5159   }
5160 
5161   for (i = 0; i < Vars.nvars; i++)
5162   {
5163     const UDM_VAR *Var= UdmVarListFindConstByIndex(&Vars, i);
5164     const char *name= UdmVarName(Var);
5165     if (!strncasecmp(name, "query.",6) &&
5166         strcasecmp(name, "query.q") &&
5167         strcasecmp(name, "query.BrowserCharset") &&
5168         strcasecmp(name, "query.IP") &&
5169         UdmVarStr(Var) != NULL && UdmVarStr(Var)[0] != '\0')
5170     {
5171       udm_snprintf(qbuf, qbuf_len,
5172                    "INSERT INTO qinfo (q_id,sname,sval) "
5173                    "VALUES "
5174                    "(%s%i%s,'%s','%s')",
5175                    qu, rec_id, qu, name + 6, UdmVarStr(Var));
5176       rc= UdmDBSQLQuery(A, db, NULL, qbuf);
5177       if (rc != UDM_OK) goto UdmTrack_exit;
5178     }
5179   }
5180 UdmTrack_exit:
5181   UdmVarListFree(&Vars);
5182   UDM_FREE(text_escaped);
5183   UDM_FREE(qbuf);
5184   return rc;
5185 }
5186 
5187 
5188 /********************* Adding URLInfo to Res *********************/
5189 
UpdateShows(UDM_AGENT * A,UDM_DB * db,urlid_t url_id)5190 static udm_rc_t UpdateShows(UDM_AGENT *A, UDM_DB *db, urlid_t url_id)
5191 {
5192   char qbuf[64];
5193   udm_snprintf(qbuf, sizeof(qbuf), "UPDATE url SET shows = shows + 1 WHERE rec_id = %s%i%s",
5194                (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "",
5195                url_id,
5196                (UdmSQLDBType(db) == UDM_DB_PGSQL) ? "'" : "");
5197   return UdmDBSQLQuery(A, db, NULL, qbuf);
5198 }
5199 
SQLResToSection(UDM_SQLRES * R,UDM_VARLIST * S,size_t row)5200 static void SQLResToSection(UDM_SQLRES *R, UDM_VARLIST *S, size_t row)
5201 {
5202   const char *sname=UdmSQLValue(R,row,1);
5203   const char *sval=UdmSQLValue(R,row,2);
5204   UdmVarListAddStr(S, sname, sval);
5205 }
5206 
5207 
5208 
5209 static void
SQLResToCachedCopy(UDM_SQLRES * R,UDM_DOCUMENT * Doc,size_t max_doc_size,size_t row)5210 SQLResToCachedCopy(UDM_SQLRES *R, UDM_DOCUMENT *Doc,
5211                    size_t max_doc_size, size_t row)
5212 {
5213   const char *sval= UdmSQLValue(R, row, 1);
5214   size_t len= UdmSQLLen(R, row, 1);
5215   udm_timer_t timer= 0;
5216   UdmDocSetFromCachedHTTPResponse(Doc, sval, len, max_doc_size, &timer);
5217 }
5218 
5219 
5220 static size_t
UdmDBNum(UDM_QUERY * Query,size_t n)5221 UdmDBNum(UDM_QUERY *Query, size_t n)
5222 {
5223   UDM_URLDATA *Data= &Query->URLData.Item[n + Query->stats.first];
5224   return UDM_COORD2DBNUM(Data->score);
5225 }
5226 
5227 
5228 static udm_rc_t
UdmQueryAddURLInfoUsingIN(UDM_AGENT * Agent,UDM_DB * db,UDM_QUERY * Query,const char * qbuf,int urlinfob)5229 UdmQueryAddURLInfoUsingIN(UDM_AGENT *Agent, UDM_DB *db, UDM_QUERY *Query,
5230                           const char *qbuf, int urlinfob)
5231 {
5232   udm_rc_t rc;
5233   UDM_SQLRES SQLres;
5234   size_t j, sqlrows;
5235   size_t max_doc_size= UdmVarListFindInt(&Agent->Conf->Vars, "MaxDocSize", UDM_MAXDOCSIZE);
5236 
5237   if (UDM_OK!= (rc= UdmDBSQLQuery(Agent, db, &SQLres, qbuf)))
5238     return rc;
5239 
5240   for (sqlrows= UdmSQLNumRows(&SQLres), j=0;
5241        j< UdmResultNumRows(&Query->Res);
5242        j++)
5243   {
5244     if (&Agent->Conf->DBList.Item[UdmDBNum(Query, j)] == db)
5245     {
5246       size_t i;
5247       UDM_DOCUMENT *D= &Query->Res.Doc[j];
5248       urlid_t      url_id = UdmVarListFindInt(&D->Sections, "ID", 0);
5249       for(i = 0; i < sqlrows; i++)
5250       {
5251         if(url_id == UDM_ATOI(UdmSQLValue(&SQLres,i,0)))
5252         {
5253           if (urlinfob)
5254             SQLResToCachedCopy(&SQLres, D, max_doc_size, i);
5255           else
5256             SQLResToSection(&SQLres, &D->Sections, i);
5257         }
5258       }
5259     }
5260   }
5261   UdmSQLFree(&SQLres);
5262   return UDM_OK;
5263 }
5264 
5265 
5266 static udm_rc_t
UdmDocAddURLInfo(UDM_AGENT * A,UDM_DOCUMENT * D,UDM_DB * db,const char * qbuf)5267 UdmDocAddURLInfo(UDM_AGENT *A, UDM_DOCUMENT *D, UDM_DB *db, const char *qbuf)
5268 {
5269   UDM_SQLRES SQLres;
5270   udm_rc_t rc;
5271   size_t row;
5272 
5273   if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLres, qbuf)))
5274     return rc;
5275   for(row= 0; row < UdmSQLNumRows(&SQLres); row++)
5276     SQLResToSection(&SQLres, &D->Sections, row);
5277   UdmSQLFree(&SQLres);
5278   return rc;
5279 }
5280 
5281 
5282 static void
UdmDocSetPopularity(UDM_DOCUMENT * D,double pop)5283 UdmDocSetPopularity(UDM_DOCUMENT *D, double pop)
5284 {
5285   char tmp[16];
5286   udm_snprintf(tmp, sizeof(tmp), "%.5f", pop);
5287   UdmVarListReplaceStr(&D->Sections, "Pop_Rank", tmp);
5288 }
5289 
5290 
5291 typedef struct
5292 {
5293   udm_bool_t LoadURLBasicInfo;
5294   udm_bool_t LoadURLInfo;
5295   udm_bool_t LoadURLInfoBin;
5296   udm_bool_t LoadTagInfo;
5297   udm_bool_t PopRankUseShowCnt;
5298   double PopRankShowCntRatio;
5299 } UDM_RESINFOPARAM;
5300 
5301 
5302 static void
UdmResInfoParamInit(UDM_RESINFOPARAM * P,UDM_VARLIST * Vars)5303 UdmResInfoParamInit(UDM_RESINFOPARAM *P, UDM_VARLIST *Vars)
5304 {
5305   P->PopRankUseShowCnt= UdmVarListFindBool(Vars, "PopRankUseShowCnt", UDM_FALSE);
5306   P->LoadTagInfo= UdmVarListFindBool(Vars, "LoadTagInfo", UDM_FALSE);
5307   P->LoadURLInfo= UdmVarListFindBool(Vars, "LoadURLInfo", UDM_TRUE);
5308   P->PopRankShowCntRatio= UdmVarListFindDouble(Vars, "PopRankShowCntRatio", 25.0);
5309   P->LoadURLInfoBin= UdmVarListFindBool(Vars, "LoadURLInfoBin", UDM_TRUE);
5310   P->LoadURLBasicInfo= UdmVarListFindBool(Vars, "LoadURLBasicInfo", UDM_TRUE);
5311 }
5312 
5313 
5314 static udm_rc_t
UdmQueryAddDocInfoUsingLoop(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5315 UdmQueryAddDocInfoUsingLoop(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5316 {
5317   udm_rc_t rc;
5318   size_t i;
5319   UDM_SQLRES SQLres;
5320   UDM_RESINFOPARAM param;
5321   size_t max_doc_size= UdmVarListFindInt(&A->Conf->Vars, "MaxDocSize", UDM_MAXDOCSIZE);
5322 
5323   UdmResInfoParamInit(&param, &A->Conf->Vars);
5324   for(i= 0; i < UdmResultNumRows(&Query->Res); i++)
5325   {
5326     UDM_DOCUMENT *D= &Query->Res.Doc[i];
5327     urlid_t  url_id= UdmVarListFindInt(&D->Sections, "ID", 0);
5328     char  qbuf[128];
5329 
5330     if (&A->Conf->DBList.Item[UdmDBNum(Query, i)] != db)
5331       continue;
5332 
5333     udm_snprintf(qbuf, sizeof(qbuf),
5334                  "SELECT " SQLRESTODOC_COLUMNS
5335                  " FROM url WHERE rec_id=%d", url_id);
5336     if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLres, qbuf)))
5337       return rc;
5338 
5339     if (UdmSQLNumRows(&SQLres))
5340     {
5341       double pr;
5342       SQLResToDoc(A->Conf, D, &SQLres, 0);
5343       if (param.PopRankUseShowCnt &&
5344           (pr= atof(UdmVarListFindStr(&D->Sections, "Score", "0.0"))) >=
5345           param.PopRankShowCntRatio)
5346         UpdateShows(A, db, url_id);
5347     }
5348     UdmSQLFree(&SQLres);
5349 
5350     if (param.LoadTagInfo)
5351     {
5352       udm_snprintf(qbuf, sizeof(qbuf),
5353                    "SELECT u.rec_id, 'tag', tag FROM url u, server s "
5354                    "WHERE  u.rec_id=%d AND u.server_id=s.rec_id", url_id);
5355       if(UDM_OK != (rc= UdmDocAddURLInfo(A, D, db, qbuf)))
5356         return rc;
5357     }
5358 
5359     if (param.LoadURLInfo)
5360     {
5361       sprintf(qbuf,"SELECT url_id,sname,sval FROM urlinfo WHERE url_id=%i", url_id);
5362       if(UDM_OK != (rc= UdmDocAddURLInfo(A, D, db, qbuf)))
5363         return rc;
5364     }
5365 
5366     if (param.LoadURLInfoBin)
5367     {
5368       sprintf(qbuf,"SELECT url_id,content FROM cachedcopy WHERE url_id=%i", url_id);
5369       if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLres, qbuf)))
5370         return rc;
5371       if (UdmSQLNumRows(&SQLres))
5372         SQLResToCachedCopy(&SQLres, D, max_doc_size, 0);
5373     }
5374   }
5375   return UDM_OK;
5376 }
5377 
5378 
5379 static udm_rc_t
UdmQueryAddDocInfoUsingIN(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5380 UdmQueryAddDocInfoUsingIN(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5381 {
5382   udm_rc_t rc= UDM_OK;
5383   size_t i, j, sqlrows;
5384   UDM_SQLRES SQLres;
5385   UDM_RESINFOPARAM param;
5386   UDM_DSTR  in_list, qq;
5387   const char *hi_priority= UdmSQLDBType(db) == UDM_DB_MYSQL ? "HIGH_PRIORITY" : "";
5388 
5389   UdmResInfoParamInit(&param, &A->Conf->Vars);
5390 
5391   UdmDSTRInit(&in_list, 1024);
5392   UdmDSTRInit(&qq, 1024);
5393 
5394   /* Compose IN string and set to zero url_id field */
5395   for(i=0; i < UdmResultNumRows(&Query->Res); i++)
5396   {
5397     if (&A->Conf->DBList.Item[UdmDBNum(Query, i)] == db)
5398     {
5399       const char *comma= UdmDSTRLength(&in_list) ? "," : "";
5400       const char *squot= UdmSQLDBType(db) == UDM_DB_PGSQL ? "'" : "";
5401       UdmDSTRAppendf(&in_list, "%s%s%i%s", comma, squot,
5402               UdmVarListFindInt(&Query->Res.Doc[i].Sections, "ID", 0), squot);
5403     }
5404   }
5405 
5406   if (!UdmDSTRLength(&in_list))
5407     goto ret_in;
5408 
5409   if (param.LoadURLBasicInfo)
5410   {
5411     UdmDSTRReset(&qq);
5412     UdmDSTRAppendf(&qq,
5413                    "SELECT %s " SQLRESTODOC_COLUMNS
5414                    " FROM url WHERE rec_id IN (%s)",
5415                    hi_priority, UdmDSTRPtr(&in_list));
5416     if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLres, UdmDSTRPtr(&qq))))
5417       goto ret_in;
5418 
5419     for (sqlrows= UdmSQLNumRows(&SQLres), j=0;
5420          j < UdmResultNumRows(&Query->Res);
5421          j++)
5422     {
5423       if (&A->Conf->DBList.Item[UdmDBNum(Query, j)] == db)
5424       {
5425         UDM_DOCUMENT *D= &Query->Res.Doc[j];
5426         urlid_t      url_id= UdmVarListFindInt(&D->Sections, "ID", 0);
5427         for(i= 0; i < sqlrows; i++)
5428         {
5429           if (url_id == UDM_ATOI(UdmSQLValue(&SQLres,i,0)))
5430           {
5431             double pr;
5432             SQLResToDoc(A->Conf, D, &SQLres, i);
5433             if (param.PopRankUseShowCnt &&
5434                 (pr= atof(UdmVarListFindStr(&D->Sections, "Score", "0.0"))) >=
5435                 param.PopRankShowCntRatio)
5436               UpdateShows(A, db, url_id);
5437             break;
5438           }
5439         }
5440       }
5441    }
5442    UdmSQLFree(&SQLres);
5443   }
5444 
5445   if (param.LoadTagInfo)
5446   {
5447     UdmDSTRReset(&qq);
5448     UdmDSTRAppendf(&qq,
5449                    "SELECT u.rec_id, 'tag', tag FROM url u, server s "
5450                    "WHERE  u.rec_id in (%s) AND u.server_id=s.rec_id",
5451                    UdmDSTRPtr(&in_list));
5452     if (UDM_OK != (rc= UdmQueryAddURLInfoUsingIN(A, db, Query,
5453                                                  UdmDSTRPtr(&qq), 0)))
5454       return rc;
5455   }
5456 
5457   if (param.LoadURLInfo)
5458   {
5459     UdmDSTRReset(&qq);
5460     UdmDSTRAppendf(&qq,
5461                    "SELECT url_id,sname,sval "
5462                    "FROM urlinfo WHERE url_id IN (%s)",
5463                    UdmDSTRPtr(&in_list));
5464     if (UDM_OK != (rc= UdmQueryAddURLInfoUsingIN(A, db, Query,
5465                                                  UdmDSTRPtr(&qq), 0)))
5466       return rc;
5467   }
5468 
5469   if (param.LoadURLInfoBin)
5470   {
5471     UdmDSTRReset(&qq);
5472     UdmDSTRAppendf(&qq,
5473                    "SELECT url_id,content "
5474                    "FROM cachedcopy WHERE url_id IN (%s)",
5475                    UdmDSTRPtr(&in_list));
5476     if (UDM_OK != (rc= UdmQueryAddURLInfoUsingIN(A, db, Query,
5477                                                  UdmDSTRPtr(&qq), 1)))
5478       return rc;
5479   }
5480 
5481 ret_in:
5482   UdmDSTRFree(&in_list);
5483   UdmDSTRFree(&qq);
5484   return rc;
5485 }
5486 
5487 
5488 static udm_rc_t
UdmQueryAddDocInfoSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5489 UdmQueryAddDocInfoSQL(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5490 {
5491   size_t i;
5492   udm_rc_t rc;
5493   UDM_RESINFOPARAM param;
5494 
5495   if (!UdmResultNumRows(&Query->Res))
5496     return UDM_OK;
5497 
5498   UdmResInfoParamInit(&param, &A->Conf->Vars);
5499 
5500   if (param.PopRankUseShowCnt)
5501     UdmLog(A, UDM_LOG_DEBUG, "use_showcnt: %d  ratio: %f",
5502            param.PopRankUseShowCnt, param.PopRankShowCntRatio);
5503 
5504   for (i= 0; i < UdmResultNumRows(&Query->Res); i++)
5505   {
5506     UDM_URLDATA *Data= &Query->URLData.Item[i + Query->stats.first];
5507     UdmVarListReplaceInt(&Query->Res.Doc[i].Sections, "id", Data->url_id);
5508     UdmDocSetPopularity(&Query->Res.Doc[i], Data->pop_rank);
5509   }
5510 
5511   rc= UdmSQLDBHaveIn(db) ?
5512       UdmQueryAddDocInfoUsingIN(A, db, Query) :
5513       UdmQueryAddDocInfoUsingLoop(A, db, Query);
5514 
5515   return rc;
5516 }
5517 
5518 
5519 /************************* Misc *******************************************/
5520 
5521 
5522 static udm_rc_t
UdmExportSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)5523 UdmExportSQL(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
5524 {
5525   UDM_SQLRES SQLRes;
5526   udm_rc_t rc;
5527   UDM_STR row[24];
5528 
5529   /* TODO34: add well-formed output */
5530   printf("<database>\n");
5531   printf("<urlList>\n");
5532   rc= UdmDBSQLExecDirect(Indexer, db, &SQLRes,
5533                          "SELECT rec_id,status,docsize,next_index_time,"
5534                                  "last_mod_time,referrer,hops,crc32,seed,"
5535                                  "bad_since_time,server_id,shows,"
5536                                  "url"
5537                          " FROM url");
5538   if (rc != UDM_OK) return(rc);
5539   while (UdmDBSQLFetchRow(Indexer, db, &SQLRes, row) == UDM_OK)
5540   {
5541     printf(
5542       "<url "
5543       "rec_id=\"%s\" "
5544       "status=\"%s\" "
5545       "docsize=\"%s\" "
5546       "next_index_time=\"%s\" "
5547       "last_mod_time=\"%s\" "
5548       "referrer=\"%s\" "
5549       "hops=\"%s\" "
5550       "crc32=\"%s\" "
5551       "seed=\"%s\" "
5552       "bad_since_time=\"%s\" "
5553       "server_id=\"%s\" "
5554       "shows=\"%s\" "
5555       "url=\"%s\" "
5556       "/>\n",
5557       row[0].str, row[1].str, row[2].str, row[3].str,
5558       row[4].str, row[5].str, row[6].str, row[7].str,
5559       row[8].str, row[9].str, row[10].str, row[11].str,
5560       row[12].str);
5561   }
5562   UdmSQLFree(&SQLRes);
5563   printf("</urlList>\n");
5564 
5565   printf("<linkList>\n");
5566   if (UDM_OK != (rc= UdmDBSQLExecDirect(Indexer, db, &SQLRes,
5567                  "SELECT url_id,seed,url,src,rel,linktext FROM links")))
5568     return rc;
5569   while (UdmDBSQLFetchRow(Indexer, db, &SQLRes, row) == UDM_OK)
5570   {
5571     printf(
5572       "<link "
5573       "url_id=\"%s\" "
5574       "seed=\"%s\" "
5575       "url=\"%s\" "
5576       "src=\"%s\" "
5577       "rel=\"%s\" "
5578       "linktext=\"%s\" "
5579       "/>\n",
5580       row[0].str, row[1].str, row[2].str,
5581       row[3].str, row[4].str, row[5].str);
5582   }
5583   UdmSQLFree(&SQLRes);
5584   printf("</linkList>\n");
5585 
5586   printf("</database>\n");
5587   return(0);
5588 }
5589 
5590 
5591 static udm_rc_t
UdmDocPerSite(UDM_AGENT * A,UDM_DOCUMENT * D,UDM_DB * db)5592 UdmDocPerSite(UDM_AGENT *A, UDM_DOCUMENT *D, UDM_DB *db)
5593 {
5594   char qbuf[1024];
5595   const char *s, *hostinfo= UdmVarListFindStr(&D->Sections, "Hostinfo", NULL);
5596   udm_rc_t rc;
5597   int num, prevnum= UdmVarListFindInt(&D->Sections, "DocPerSite", 0);
5598   UDM_SQLRES SQLRes;
5599 
5600   if (!hostinfo)
5601     return UDM_OK;
5602 
5603   for (s= hostinfo; s[0]; s++)
5604   {
5605     /*
5606       Host name good characters: digits, letters, hyphen (-).
5607       Just check the worst characters.
5608     */
5609     if (*s == '\'' || *s == '\"')
5610     {
5611       num= 1000000;
5612       goto ret;
5613     }
5614   }
5615   udm_snprintf(qbuf, sizeof(qbuf),
5616                "SELECT COUNT(*) FROM url WHERE url LIKE '%s%%'", hostinfo);
5617 
5618   if (UDM_OK!= (rc= UdmDBSQLQuery(A, db, &SQLRes, qbuf)))
5619     return rc;
5620   num= prevnum + atoi(UdmSQLValue(&SQLRes, 0, 0));
5621   UdmSQLFree(&SQLRes);
5622 ret:
5623   UdmVarListReplaceInt(&D->Sections, "DocPerSite", num);
5624   return UDM_OK;
5625 }
5626 
5627 
5628 static udm_rc_t
UdmImportSection(UDM_AGENT * A,UDM_DOCUMENT * D,UDM_DB * db)5629 UdmImportSection(UDM_AGENT *A, UDM_DOCUMENT *D, UDM_DB *db)
5630 {
5631   UDM_CONST_TEXTITEM ConstItem;
5632   UDM_TEXT_PARAM Param;
5633   UDM_VARLIST Vars;
5634   UDM_SQLRES SQLRes;
5635   UDM_DSTR d;
5636   udm_rc_t rc;
5637   size_t row, rows, cols;
5638   const char *fmt= UdmVarListFindStr(&D->Sections, "SQLImportSection", NULL);
5639 
5640   if (!fmt)
5641     return UDM_OK;
5642 
5643   UdmDSTRInit(&d, 1024);
5644   UdmVarListInit(&Vars);
5645   UdmVarListSQLEscape(A, &Vars, &D->Sections, db);
5646   UdmDSTRParse(&d, fmt, &Vars);
5647   UdmVarListFree(&Vars);
5648   if(UDM_OK!= (rc= UdmDBSQLQuery(A, db, &SQLRes, UdmDSTRPtr(&d))))
5649     return rc;
5650 
5651   cols= UdmSQLNumCols(&SQLRes);
5652   UdmConstTextItemInit(&ConstItem);
5653   UdmTextParamInit(&Param, UDM_TEXTLIST_FLAG_HTML, 0); /* TODO34: indexer.conf: format */
5654   for (row=0, rows= UdmSQLNumRows(&SQLRes); row < rows; row++)
5655   {
5656     size_t col;
5657     for (col= 0; col + 1 < cols; col+= 2)
5658     {
5659       const UDM_VAR *Sec;
5660       ConstItem.section_name.str= UdmSQLValue(&SQLRes, row, col);
5661       ConstItem.section_name.length= UdmSQLLen(&SQLRes, row, col);
5662       if ((Sec= UdmVarListFind(&D->Sections, ConstItem.section_name.str)))
5663       {
5664         ConstItem.text.str= UdmSQLValue(&SQLRes, row, col + 1);
5665         ConstItem.text.length= UdmSQLLen(&SQLRes, row, col + 1);
5666         Param.secno= UdmVarSecno(Sec);
5667         UdmTextListAddConst(&D->TextList, &ConstItem, &Param);
5668       }
5669     }
5670   }
5671 
5672   UdmDSTRFree(&d);
5673   UdmSQLFree(&SQLRes);
5674   return rc;
5675 }
5676 
5677 
5678 static udm_rc_t
UdmGetReferers(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)5679 UdmGetReferers(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
5680 {
5681   size_t    i,j;
5682   char    qbuf[2048];
5683   UDM_SQLRES  SQLres;
5684   const char  *where;
5685   udm_rc_t rc;
5686 
5687   UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_DB);
5688   if (UDM_OK != (rc= UdmSQLBuildWhereCondition(Indexer, db, Query, &where)))
5689     return rc;
5690 
5691   udm_snprintf(qbuf, sizeof(qbuf),
5692                "SELECT url.status,url2.url,url.url "
5693                "FROM url,url url2%s "
5694                "WHERE url.referrer=url2.rec_id %s %s",
5695                Query->from, where[0] ? "AND" : "", where);
5696 
5697   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLres, qbuf)))
5698     return rc;
5699 
5700   j=UdmSQLNumRows(&SQLres);
5701   for(i=0;i<j;i++)
5702   {
5703     if(Indexer->Conf->RefInfo)Indexer->Conf->RefInfo(
5704       atoi(UdmSQLValue(&SQLres,i,0)),
5705       UdmSQLValue(&SQLres,i,2),
5706       UdmSQLValue(&SQLres,i,1)
5707     );
5708   }
5709   UdmSQLFree(&SQLres);
5710   return rc;
5711 }
5712 
5713 
5714 static udm_rc_t
UdmGetDocCount(UDM_AGENT * Indexer,UDM_DB * db)5715 UdmGetDocCount(UDM_AGENT * Indexer, UDM_DB *db)
5716 {
5717   char    qbuf[200]="";
5718   UDM_SQLRES  SQLres;
5719   udm_rc_t rc;
5720 
5721   sprintf(qbuf,NDOCS_QUERY);
5722   if (UDM_OK!= (rc= UdmDBSQLQuery(Indexer, db, &SQLres, qbuf)))
5723     return rc;
5724 
5725   if(UdmSQLNumRows(&SQLres))
5726   {
5727     const char * s;
5728     s=UdmSQLValue(&SQLres,0,0);
5729     if(s)Indexer->doccount += atoi(s);
5730   }
5731   UdmSQLFree(&SQLres);
5732   return(UDM_OK);
5733 }
5734 
5735 
5736 static udm_rc_t
UdmStatActionSQL(UDM_AGENT * Indexer,UDM_DB * db,UDM_QUERY * Query)5737 UdmStatActionSQL(UDM_AGENT *Indexer, UDM_DB *db, UDM_QUERY *Query)
5738 {
5739   size_t    i,j,n;
5740   char    qbuf[2048];
5741   UDM_SQLRES  SQLres;
5742   int    have_group= (UdmSQLDBFlags(db) & UDM_SQL_HAVE_GROUPBY);
5743   const char  *where;
5744   udm_rc_t rc= UDM_OK;
5745 
5746   if(UdmSQLDBType(db)==UDM_DB_IBASE)
5747     have_group=0;
5748 
5749   UDM_LOCK_CHECK_OWNER(Indexer, UDM_LOCK_DB);
5750   if (UDM_OK != (rc= UdmSQLBuildWhereCondition(Indexer, db, Query, &where)))
5751     return rc;
5752 
5753   if(have_group)
5754   {
5755     char func[128];
5756     int time= (int) Query->StatList.time; /* To use %d on 64bit OSs */
5757 
5758     switch(UdmSQLDBType(db))
5759     {
5760       case UDM_DB_MYSQL:
5761         udm_snprintf(func, sizeof(func), "next_index_time<=%d", time);
5762         break;
5763 
5764       case UDM_DB_PGSQL:
5765       case UDM_DB_MSSQL:
5766       case UDM_DB_SYBASE:
5767       case UDM_DB_DB2:
5768       case UDM_DB_SQLITE:
5769       case UDM_DB_SQLITE3:
5770       default:
5771         udm_snprintf(func, sizeof(func),
5772                      "case when next_index_time<=%d then 1 else 0 end", time);
5773         break;
5774 
5775       case UDM_DB_ACCESS:
5776         udm_snprintf(func, sizeof(func),
5777                      "IIF(next_index_time<=%d, 1, 0)", time);
5778         break;
5779 
5780       case UDM_DB_ORACLE8:
5781       case UDM_DB_SAPDB:
5782         udm_snprintf(func, sizeof(func),
5783                      "DECODE(SIGN(%d-next_index_time),-1,0,1,1)", time);
5784         break;
5785     }
5786 
5787     udm_snprintf(qbuf, sizeof(qbuf) - 1,
5788                  "SELECT status, SUM(%s), count(*) FROM url%s %s%s GROUP BY status ORDER BY status",
5789                  func, Query->from, where[0] ? "WHERE " : "", where);
5790 
5791     if (UDM_OK!= (rc= UdmDBSQLQuery(Indexer, db, &SQLres, qbuf)))
5792       return rc;
5793 
5794     if ((n= UdmSQLNumRows(&SQLres)))
5795     {
5796       for (i = 0; i < n; i++)
5797       {
5798         for (j= 0; j < Query->StatList.nstats; j++)
5799         {
5800           UDM_STAT *Stat= &Query->StatList.Stat[j];
5801           if (Stat->status== atoi(UdmSQLValue(&SQLres,i,0)))
5802           {
5803             Stat->expired += atoi(UdmSQLValue(&SQLres,i,1));
5804             Stat->total += atoi(UdmSQLValue(&SQLres,i,2));
5805             break;
5806           }
5807         }
5808         if (j == Query->StatList.nstats)
5809         {
5810           UDM_STAT  *S;
5811           size_t nbytes= (Query->StatList.nstats + 1) * sizeof(Query->StatList.Stat[0]);
5812           Query->StatList.Stat= (UDM_STAT*) UdmRealloc(Query->StatList.Stat, nbytes);
5813           S= &Query->StatList.Stat[Query->StatList.nstats];
5814           S->status= atoi(UdmSQLValue(&SQLres,i,0));
5815           S->expired= atoi(UdmSQLValue(&SQLres,i,1));
5816           S->total= atoi(UdmSQLValue(&SQLres,i,2));
5817           Query->StatList.nstats++;
5818         }
5819       }
5820     }
5821     UdmSQLFree(&SQLres);
5822   }
5823   else
5824   {
5825 /*
5826   FIXME: learn how to get it from SOLID and IBASE
5827   (HAVE_IBASE || HAVE_SOLID || HAVE_VIRT )
5828 */
5829 
5830     udm_snprintf(qbuf, sizeof(qbuf) - 1,
5831                  "SELECT status,next_index_time FROM url%s %s%s ORDER BY status",
5832                  Query->from, where[0] ? "WHERE " : "", where);
5833 
5834     if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLres, qbuf)))
5835       return rc;
5836 
5837     for(i=0;i<UdmSQLNumRows(&SQLres);i++)
5838     {
5839       for (j= 0; j< Query->StatList.nstats; j++)
5840       {
5841         UDM_STAT *Stat= &Query->StatList.Stat[j];
5842         if (Stat->status == atoi(UdmSQLValue(&SQLres,i,0)))
5843         {
5844           if ((time_t) UDM_ATOU(UdmSQLValue(&SQLres, i, 1)) <=
5845               Query->StatList.time)
5846             Stat->expired++;
5847           Stat->total++;
5848           break;
5849         }
5850       }
5851       if (j == Query->StatList.nstats)
5852       {
5853         size_t nbytes= sizeof(UDM_STAT) * (Query->StatList.nstats + 1);
5854         Query->StatList.Stat= (UDM_STAT *) UdmRealloc(Query->StatList.Stat, nbytes);
5855         Query->StatList.Stat[j].status= UDM_ATOI(UdmSQLValue(&SQLres,i,0));
5856         Query->StatList.Stat[j].expired= 0;
5857         if ((time_t) UDM_ATOU(UdmSQLValue(&SQLres, i, 1)) <=
5858             Query->StatList.time)
5859           Query->StatList.Stat[j].expired++;
5860         Query->StatList.Stat[j].total=1;
5861         Query->StatList.nstats++;
5862       }
5863     }
5864     UdmSQLFree(&SQLres);
5865   }
5866   return rc;
5867 }
5868 
5869 
5870 static udm_rc_t
UdmURLInfoDumpDoc(UDM_AGENT * Indexer,UDM_DB * db,UDM_DOCUMENT * Doc,const char * table,const char * column_names)5871 UdmURLInfoDumpDoc(UDM_AGENT *Indexer,
5872                   UDM_DB *db,
5873                   UDM_DOCUMENT *Doc,
5874                   const char *table,
5875                   const char *column_names)
5876 {
5877   udm_rc_t rc;
5878   char buf[64];
5879   size_t i;
5880   UDM_SQLRES SQLRes;
5881   UDM_DSTR dbuf;
5882   urlid_t url_id= UdmVarListFindInt(&Doc->Sections, "ID", 0);
5883 
5884   udm_snprintf(buf, sizeof(buf),
5885                "SELECT %s FROM %s WHERE url_id=%d", column_names, table, url_id);
5886   if (UDM_OK != (rc= UdmDBSQLQuery(Indexer, db, &SQLRes, buf)))
5887     return rc;
5888 
5889   UdmDSTRInit(&dbuf, 256);
5890 
5891   for (i= 0; i < UdmSQLNumRows(&SQLRes); i++)
5892   {
5893     size_t j;
5894     UDM_CONST_STR col[2];
5895     const UDM_CONST_STR *c= (const UDM_CONST_STR*) &col;
5896     for (j= 0; j < UdmSQLNumCols(&SQLRes); j++)
5897     {
5898       UDM_ASSERT(j < 2);
5899       UdmConstStrSet(&col[j], UdmSQLValue(&SQLRes, i, j), UdmSQLLen(&SQLRes, i, j));
5900     }
5901     UdmDocInsertSectionsUsingEscapeBuildQuery(Indexer, db, table,
5902                                               0, column_names,
5903                                               c, UdmSQLNumCols(&SQLRes),
5904                                               &dbuf);
5905     printf("%s;\n", UdmDSTRPtr(&dbuf));
5906   }
5907   UdmSQLFree(&SQLRes);
5908   UdmDSTRFree(&dbuf);
5909   return UDM_OK;
5910 }
5911 
5912 
5913 static udm_rc_t
UdmDumpData(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)5914 UdmDumpData(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
5915 {
5916   char buf[256];
5917   UDM_SQLRES SQLRes;
5918   size_t i, nrows;
5919   udm_rc_t rc;
5920   const char *where;
5921   UDM_DSTR eurl;
5922 
5923   UDM_ASSERT(UdmSQLDBModeHandler(db)->DumpWordInfo != NULL);
5924   if (UDM_OK != (rc= UdmSQLBuildWhereCondition(A, db, Query, &where)))
5925     return rc;
5926   UdmDSTRInit(&eurl, 256);
5927   udm_snprintf(buf, sizeof(buf),
5928                "SELECT %s FROM url%s%s", select_url_str_for_dump,
5929                where[0] ? " WHERE " : "", where);
5930   if (UDM_OK != (rc= UdmDBSQLQuery(A, db, &SQLRes, buf)))
5931     goto ret;
5932 
5933   nrows= UdmSQLNumRows(&SQLRes);
5934 
5935   for(i= 0; i < nrows; i++)
5936   {
5937     UDM_DOCUMENT Doc;
5938     UdmDocInit(&Doc);
5939     if (UDM_OK != UdmTargetSQLResDump(A, db, &Doc, &SQLRes, i, &eurl))
5940       goto ret;
5941     if (UDM_OK != (rc= UdmURLInfoDumpDoc(A, db, &Doc, "urlinfo", "sname,sval")))
5942       goto ret;
5943     if (UDM_OK != (rc= UdmURLInfoDumpDoc(A, db, &Doc, "cachedcopy", "sval")))
5944       goto ret;
5945     if (UDM_OK != (rc= UdmSQLDBModeHandler(db)->DumpWordInfo(A, db, &Doc)))
5946       goto ret;
5947     UdmDocFree(&Doc);
5948   }
5949 
5950 ret:
5951   UdmDSTRFree(&eurl);
5952   return rc;
5953 }
5954 
5955 
5956 static udm_rc_t
UdmRestoreData(UDM_AGENT * A,UDM_DOCUMENT * Doc,UDM_DB * db)5957 UdmRestoreData(UDM_AGENT *A, UDM_DOCUMENT *Doc, UDM_DB *db)
5958 {
5959   size_t i;
5960   udm_rc_t rc;
5961 
5962   for (i= 0; i < Doc->Sections.nvars; i++)
5963   {
5964     UDM_VAR *S= UdmVarListFindByIndex(&Doc->Sections, i);
5965     if (UdmVarValueHandlerType(S) == UDM_VALUE_HANDLER_TYPE_STR)
5966     {
5967       UDM_SECTION *Sec= (UDM_SECTION *) UdmVarDataPtr(S);
5968       printf("%s[%d]=%s\n", UdmVarName(S),
5969              (int) UdmSectionLength(Sec), UdmSectionPtr(Sec));
5970     }
5971   }
5972 
5973   if (UDM_OK != (rc= UdmAddURL(A, Doc, db)))
5974     goto ex;
5975   if (UDM_OK != (rc= UdmFindURL(A, Doc, db)))
5976     goto ex;
5977   if (UDM_OK != (rc= UdmLongUpdateURL(A, Doc, db)))
5978     goto ex;
5979 
5980   printf("\n");
5981 
5982 ex:
5983   return rc;
5984 }
5985 
5986 
5987 /******* "indexer -Ewordstat" - word statistics for suggestions *************/
5988 
5989 udm_rc_t
UdmWordStatQuery(UDM_AGENT * A,UDM_DB * db,const char * src)5990 UdmWordStatQuery(UDM_AGENT *A, UDM_DB *db, const char *src)
5991 {
5992   udm_rc_t rc;
5993   UDM_SQLRES SQLRes;
5994   size_t row, rows;
5995 
5996   if (UDM_OK!= (rc= UdmDBSQLQuery(A, db, &SQLRes, src)))
5997     return rc;
5998 
5999   if (UDM_OK != (rc= UdmDBSQLLockOrBegin(A, db, "wrdstat WRITE")))
6000     return rc;
6001 
6002   rows=UdmSQLNumRows(&SQLRes);
6003   for(row=0 ; row < rows ; row++)
6004   {
6005     const char *word;
6006     int count;
6007     size_t wordlen;
6008     char snd[UDM_MAXWORDSIZE];
6009     char insert[64 + 2 * UDM_MAXWORDSIZE];
6010     /*
6011       Skip words that are longer than UDM_MAXWORDSIZE.
6012     */
6013     if ((wordlen= UdmSQLLen(&SQLRes, row, 0)) > sizeof(snd))
6014       continue;
6015     word= UdmSQLValue(&SQLRes, row, 0);
6016     count= UDM_ATOI(UdmSQLValue(&SQLRes, row, 1));
6017     UdmSoundex(A->Conf->lcs, snd, sizeof(snd), word, wordlen);
6018     if (snd[0])
6019     {
6020       udm_snprintf(insert, sizeof(insert),
6021                    "INSERT INTO wrdstat (word, snd, cnt) VALUES ('%s','%s',%d)",
6022                    word, snd, count);
6023       if (UDM_OK!= (rc= UdmDBSQLQuery(A, db, NULL, insert)))
6024         return rc;
6025     }
6026     if (((row % 1000) == 999) && row + 100 < rows)
6027     {
6028       if (UDM_OK != (rc= UdmDBSQLUnlockOrCommit(A, db)))
6029         return rc;
6030       if (UDM_OK != (rc= UdmDBSQLLockOrBegin(A, db, "wrdstat WRITE")))
6031         return rc;
6032     }
6033   }
6034   UdmSQLFree(&SQLRes);
6035   if (UDM_OK != (rc= UdmDBSQLUnlockOrCommit(A, db)))
6036     return rc;
6037   return UDM_OK;
6038 }
6039 
6040 
6041 static udm_rc_t
UdmWordStatCreate(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)6042 UdmWordStatCreate(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
6043 {
6044   udm_rc_t rc;
6045   if (UDM_OK != (rc= UdmDBSQLTableTruncateOrDelete(A, db, "wrdstat")))
6046     return rc;
6047   UDM_ASSERT(UdmSQLDB(db)->dbmode_handler->QueryAction != NULL);
6048   rc= UdmSQLDB(db)->dbmode_handler->QueryAction(A, db, Query, UDM_QUERYCMD_WORDSTAT);
6049   return rc;
6050 }
6051 
6052 /******************* create and drop ***********************/
6053 typedef struct
6054 {
6055   UDM_AGENT *Agent;
6056   FILE *infile;
6057 } UDM_CREATE_DROP_PARAM;
6058 
6059 
6060 static udm_rc_t
sqlmonprompt_create_or_drop(UDM_IOHANDLER * iohandler,udm_msg_t msgtype,const char * msg)6061 sqlmonprompt_create_or_drop(UDM_IOHANDLER *iohandler,
6062                             udm_msg_t msgtype, const char *msg)
6063 {
6064   UDM_SQLMON_PARAM *prm= (UDM_SQLMON_PARAM*) iohandler->user_data;
6065   UDM_CREATE_DROP_PARAM *prm2= (UDM_CREATE_DROP_PARAM *) prm->user_data;
6066   int level= msgtype == UDM_MSG_ERROR ? UDM_LOG_ERROR : UDM_LOG_EXTRA;
6067   if (msgtype == UDM_MSG_ERROR)
6068     UdmLog(prm2->Agent, level, "ERROR at line %d: %s",
6069            (int) prm->lineno + 1, msg);
6070   else
6071     UdmLog(prm2->Agent, level, "%s", msg);
6072   return UDM_OK;
6073 }
6074 
6075 
6076 static char *
sqlmongets_create_or_drop(UDM_IOHANDLER * iohandler,char * str,size_t size)6077 sqlmongets_create_or_drop(UDM_IOHANDLER *iohandler, char *str, size_t size)
6078 {
6079   UDM_SQLMON_PARAM *prm= (UDM_SQLMON_PARAM*) iohandler->user_data;
6080   UDM_CREATE_DROP_PARAM *prm2= (UDM_CREATE_DROP_PARAM *) prm->user_data;
6081   if (!fgets(str, size, prm2->infile))
6082     return 0;
6083   return str;
6084 }
6085 
6086 
6087 static const char*
UdmCreateOrDropCmdStr(udm_dbcmd_t cmd)6088 UdmCreateOrDropCmdStr(udm_dbcmd_t cmd)
6089 {
6090   switch(cmd)
6091   {
6092     case UDM_DBCMD_CREATE: return "create";
6093     case UDM_DBCMD_DROP:   return "drop";
6094     default: return "";
6095   }
6096   return "unknown_cmd";
6097 }
6098 
6099 
6100 static udm_rc_t
UdmCreateOrDropSQL(UDM_AGENT * A,UDM_DB * db,udm_dbcmd_t cmd)6101 UdmCreateOrDropSQL(UDM_AGENT *A, UDM_DB *db, udm_dbcmd_t cmd)
6102 {
6103   char fname[1024];
6104   const char *sdir= UdmVarListFindStr(&A->Conf->Vars, "ShareDir", UDM_SHARE_DIR);
6105 #ifdef HAVE_FHS_LAYOUT
6106   const char *sdir2= "create" UDMSLASHSTR;
6107 #else
6108   const char *sdir2= "";
6109 #endif
6110   UDM_SQLMON_PARAM prm;
6111   UDM_CREATE_DROP_PARAM prm2;
6112 
6113   udm_snprintf(fname,sizeof(fname),"%s%s%s%s%s%s.%s.sql",
6114                sdir, UDMSLASHSTR, sdir2,
6115                UdmSQLDBTypeToStr(UdmSQLDBType(db)), UDMSLASHSTR,
6116                UdmCreateOrDropCmdStr(cmd),
6117                UdmSQLDBModeToStr(UdmSQLDBMode(db)));
6118   UdmLog(A, UDM_LOG_ERROR, "Running '%s'", fname);
6119   prm2.Agent= A;
6120   if (!(prm2.infile= fopen(fname,"r")))
6121   {
6122     sprintf(A->Conf->errstr, "Can't open file '%s'", fname);
6123     return UDM_ERROR;
6124   }
6125   bzero((void*)&prm,sizeof(prm));
6126   prm.currdbnum= db - A->Conf->DBList.Item;
6127   prm.flags= UDM_SQLMON_DISPLAY_FIELDS;
6128   prm.iohandler.gets= sqlmongets_create_or_drop;
6129   prm.iohandler.prompt= sqlmonprompt_create_or_drop;
6130   prm.iohandler.user_data= &prm;
6131   prm.user_data= &prm2;
6132   UdmSQLMonitor(A, A->Conf,&prm);
6133   UdmLog(A, UDM_LOG_ERROR, "%d queries sent, %d succeeded, %d failed",
6134          (int) prm.nqueries, (int) prm.ngood, (int) prm.nbad);
6135   fclose(prm2.infile);
6136   return UDM_OK;
6137 }
6138 
6139 
6140 /******************* URL handlers **************************/
6141 
6142 static udm_rc_t
UdmDocActionSQL(UDM_AGENT * A,UDM_DB * db,UDM_DOCUMENT * Doc,udm_doccmd_t cmd)6143 UdmDocActionSQL(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *Doc, udm_doccmd_t cmd)
6144 {
6145   switch (cmd)
6146   {
6147     case UDM_DOCCMD_DELETE:          return UdmDeleteURL(A, Doc, db);
6148     case UDM_DOCCMD_ADD:             return UdmAddURL(A, Doc, db);
6149     case UDM_DOCCMD_SUPDATE:         return UdmUpdateUrl(A, Doc, db);
6150     case UDM_DOCCMD_LUPDATE:         return UdmLongUpdateURL(A, Doc, db);
6151     case UDM_DOCCMD_DUPDATE:         return UdmDeleteWordsAndLinks(A, Doc, db);
6152     case UDM_DOCCMD_UPDCLONE:        return UdmUpdateClone(A, Doc, db);
6153     case UDM_DOCCMD_REGCHILD:        return UdmRegisterChild(A, Doc, db);
6154     case UDM_DOCCMD_FINDBYURL:       return UdmFindURL(A, Doc, db);
6155     case UDM_DOCCMD_FINDBYMSG:       return UdmFindMessage(A, Doc, db);
6156     case UDM_DOCCMD_FINDORIG:        return UdmFindOrigin(A, Doc, db);
6157     case UDM_DOCCMD_GET_CACHED_COPY: return UdmGetCachedCopy(A, Doc, db);
6158     case UDM_DOCCMD_DOCPERSITE:      return UdmDocPerSite(A, Doc, db);
6159     case UDM_DOCCMD_SQLIMPORTSEC:    return UdmImportSection(A, Doc, db);
6160     case UDM_DOCCMD_RESTOREDATA:     return UdmRestoreData(A, Doc, db);
6161   }
6162   return UDM_ERROR;
6163 }
6164 
6165 
6166 typedef struct udm_sqldb_driver_st
6167 {
6168   const char *name;
6169   udm_sqldbtype_t DBType;
6170   udm_sqldbapi_t  DBDriver;
6171   int DBSQL_IN;
6172   int flags;
6173   const UDM_SQLDB_HANDLER *handler;
6174 } UDM_SQLDB_DRIVER;
6175 
6176 
6177 static const UDM_SQLDB_DRIVER SQLDriver[]=
6178 {
6179 #if (HAVE_ORACLE8)
6180   {
6181     "oracle8", UDM_DB_ORACLE8, UDM_DBAPI_ORACLE8, 1,
6182     UDM_SQL_HAVE_GROUPBY   | UDM_SQL_HAVE_TRUNCATE |
6183     UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND |
6184     UDM_SQL_HAVE_ROWNUM    | UDM_SQL_HAVE_GOOD_COMMIT | UDM_SQL_HAVE_TRANSACT |
6185     UDM_SQL_HAVE_RENAME    | UDM_SQL_HAVE_CREATE_LIKE,
6186     &udm_sqldb_oracle_handler
6187   },
6188   {
6189     "oracle", UDM_DB_ORACLE8, UDM_DBAPI_ORACLE8, 1,
6190     UDM_SQL_HAVE_GROUPBY   | UDM_SQL_HAVE_TRUNCATE |
6191     UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND |
6192     UDM_SQL_HAVE_ROWNUM    | UDM_SQL_HAVE_GOOD_COMMIT | UDM_SQL_HAVE_TRANSACT |
6193     UDM_SQL_HAVE_RENAME    | UDM_SQL_HAVE_CREATE_LIKE,
6194     &udm_sqldb_oracle_handler
6195   },
6196 #endif
6197 #if (HAVE_CTLIB)
6198   {
6199     "mssql", UDM_DB_MSSQL, UDM_DBAPI_CTLIB, 1,
6200     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
6201     UDM_SQL_HAVE_TOP     | UDM_SQL_HAVE_0xHEX | UDM_SQL_HAVE_TRANSACT |
6202     UDM_SQL_HAVE_RENAME  | UDM_SQL_HAVE_CREATE_LIKE,
6203     &udm_sqldb_ctlib_handler
6204   },
6205   {
6206     "sybase", UDM_DB_SYBASE, UDM_DBAPI_CTLIB, 1,
6207     UDM_SQL_HAVE_GROUPBY | /*UDM_SQL_HAVE_TRUNCATE |*/
6208     /*
6209       Don't use TRUNCATE with Sybase.
6210       It gives error:
6211       'TRUNCATE TABLE command not allowed within multi-statement
6212       transaction.
6213       TODO: modify the code to use TRUNCATE outside a transaction
6214     */
6215     UDM_SQL_HAVE_TOP     | UDM_SQL_HAVE_0xHEX    |
6216     UDM_SQL_HAVE_GOOD_COMMIT | UDM_SQL_HAVE_TRANSACT
6217     /*
6218     Something goes wrong with sp_rename!
6219     UDM_SQL_HAVE_RENAME  | UDM_SQL_HAVE_CREATE_LIKE
6220     */
6221      ,
6222     &udm_sqldb_ctlib_handler
6223   },
6224 #endif
6225 #if (HAVE_MYSQL)
6226   {
6227     "mysql", UDM_DB_MYSQL, UDM_DBAPI_MYSQL, 1,
6228     UDM_SQL_HAVE_BIND  |
6229     UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_0xHEX |
6230     UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE |
6231     UDM_SQL_HAVE_DROP_IF_EXISTS,
6232     &udm_sqldb_mysql_handler
6233   },
6234 #endif
6235 #if (HAVE_PGSQL)
6236   {
6237     "pgsql", UDM_DB_PGSQL, UDM_DBAPI_PGSQL, 1,
6238     UDM_SQL_HAVE_BIND  |
6239     UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY |
6240     UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_TRANSACT |
6241     UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE,
6242     /* UDM_SQL_HAVE_DROP_IF_EXISTS depends on server version */
6243     &udm_sqldb_pgsql_handler,
6244   },
6245 #endif
6246 #if (HAVE_IBASE)
6247   {
6248     "ibase", UDM_DB_IBASE, UDM_DBAPI_IBASE, 0,
6249     /*
6250     while indexing large sites and using the SQL in statement
6251     interbase will fail when the items in the in IN statements
6252     are more then 1500. We'd better have to fix code to avoid
6253     big INs instead of hidding DBSQL_IN.
6254     */
6255     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND |
6256     UDM_SQL_HAVE_FIRST_SKIP | UDM_SQL_HAVE_TRANSACT | UDM_SQL_HAVE_GOOD_COMMIT,
6257     &udm_sqldb_ibase_handler,
6258   },
6259 #endif
6260 #if (HAVE_SQLITE)
6261   {
6262     "sqlite",
6263     UDM_DB_SQLITE, UDM_DBAPI_SQLITE, 1,
6264     UDM_SQL_HAVE_BIND  |
6265     UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRANSACT,
6266     &udm_sqldb_sqlite_handler,
6267   },
6268 #endif
6269 #if (HAVE_SQLITE3)
6270   {
6271     "sqlite3",
6272     UDM_DB_SQLITE3, UDM_DBAPI_SQLITE3, 1,
6273     UDM_SQL_HAVE_BIND   |
6274     UDM_SQL_HAVE_LIMIT  | UDM_SQL_HAVE_GROUPBY |
6275     UDM_SQL_HAVE_STDHEX | UDM_SQL_HAVE_GOOD_COMMIT | UDM_SQL_HAVE_TRANSACT |
6276     UDM_SQL_HAVE_DROP_IF_EXISTS | UDM_SQL_HAVE_RENAME |
6277     UDM_SQL_HAVE_CREATE_LIKE,
6278     &udm_sqldb_sqlite3_handler,
6279   },
6280 #endif
6281 #if (HAVE_ODBC)
6282   {
6283     "odbc-solid", UDM_DB_SOLID, UDM_DBAPI_ODBC, 1,
6284     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_TRANSACT,
6285     &udm_sqldb_odbc_handler,
6286   },
6287   {
6288     "odbc-sapdb", UDM_DB_SAPDB, UDM_DBAPI_ODBC, 1,
6289     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_TRANSACT,
6290     &udm_sqldb_odbc_handler,
6291   },
6292   {
6293     "odbc-db2", UDM_DB_DB2, UDM_DBAPI_ODBC, 1,
6294     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND |
6295     UDM_SQL_HAVE_BIND    | UDM_SQL_HAVE_STDHEX | UDM_SQL_HAVE_TRANSACT |
6296     UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE,
6297     &udm_sqldb_odbc_handler,
6298   },
6299   {
6300     "odbc-access", UDM_DB_ACCESS, UDM_DBAPI_ODBC, 1,
6301     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT |
6302     UDM_SQL_HAVE_0xHEX   | UDM_SQL_HAVE_TRANSACT,
6303     &udm_sqldb_odbc_handler,
6304   },
6305   {
6306     "odbc-mimer", UDM_DB_MIMER, UDM_DBAPI_ODBC, 1,
6307     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT |
6308     UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_STDHEX | UDM_SQL_HAVE_TRANSACT,
6309     &udm_sqldb_odbc_handler,
6310   },
6311   {
6312     "odbc-cache", UDM_DB_CACHE, UDM_DBAPI_ODBC, 1,
6313     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT |
6314     UDM_SQL_HAVE_BIND    | UDM_SQL_HAVE_TRANSACT,
6315     &udm_sqldb_odbc_handler,
6316   },
6317   {
6318     "odbc-virtuoso", UDM_DB_VIRT, UDM_DBAPI_ODBC, 1,
6319     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT |
6320     UDM_SQL_HAVE_BIND    | UDM_SQL_HAVE_TRANSACT | UDM_SQL_HAVE_TOP,
6321     &udm_sqldb_odbc_handler,
6322   },
6323   {
6324     "odbc-oracle", UDM_DB_ORACLE8, UDM_DBAPI_ODBC, 1,
6325     UDM_SQL_HAVE_GROUPBY   | UDM_SQL_HAVE_TRUNCATE |
6326     UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND |
6327     UDM_SQL_HAVE_ROWNUM    | UDM_SQL_HAVE_GOOD_COMMIT |
6328     UDM_SQL_HAVE_TRANSACT  |
6329     UDM_SQL_HAVE_RENAME    | UDM_SQL_HAVE_CREATE_LIKE,
6330     &udm_sqldb_odbc_handler,
6331   },
6332   {
6333     "odbc-oracle8", UDM_DB_ORACLE8, UDM_DBAPI_ODBC, 1,
6334     UDM_SQL_HAVE_GROUPBY   | UDM_SQL_HAVE_TRUNCATE |
6335     UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_BIND | UDM_SQL_HAVE_ROWNUM |
6336     UDM_SQL_HAVE_TRANSACT  |
6337     UDM_SQL_HAVE_RENAME    | UDM_SQL_HAVE_CREATE_LIKE,
6338     &udm_sqldb_odbc_handler,
6339   },
6340   {
6341     "odbc-mssql", UDM_DB_MSSQL, UDM_DBAPI_ODBC, 1,
6342     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_TRUNCATE |
6343     UDM_SQL_HAVE_TOP     | UDM_SQL_HAVE_0xHEX | UDM_SQL_HAVE_TRANSACT |
6344     UDM_SQL_HAVE_RENAME  | UDM_SQL_HAVE_CREATE_LIKE,
6345     &udm_sqldb_odbc_handler,
6346   },
6347   {
6348     "odbc-sybase", UDM_DB_SYBASE, UDM_DBAPI_ODBC, 1,
6349     UDM_SQL_HAVE_GROUPBY | /*UDM_SQL_HAVE_TRUNCATE |*/
6350     UDM_SQL_HAVE_TOP     | UDM_SQL_HAVE_0xHEX |
6351     UDM_SQL_HAVE_TRANSACT| UDM_SQL_HAVE_GOOD_COMMIT,
6352     &udm_sqldb_odbc_handler,
6353   },
6354   {
6355     "odbc-mysql", UDM_DB_MYSQL, UDM_DBAPI_ODBC, 1,
6356     UDM_SQL_HAVE_BIND  |
6357     UDM_SQL_HAVE_LIMIT | UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_0xHEX |
6358     UDM_SQL_HAVE_RENAME | UDM_SQL_HAVE_CREATE_LIKE |
6359     UDM_SQL_HAVE_DROP_IF_EXISTS,
6360     &udm_sqldb_odbc_handler,
6361   },
6362   {
6363     /* Bind does not seem to work with BYTEA in Windows */
6364     "odbc-pgsql", UDM_DB_PGSQL, UDM_DBAPI_ODBC, 1,
6365     UDM_SQL_HAVE_LIMIT      | UDM_SQL_HAVE_GROUPBY |
6366     UDM_SQL_HAVE_SUBSELECT  /*| UDM_SQL_HAVE_BIND*/|
6367     UDM_SQL_HAVE_TRANSACT,
6368     &udm_sqldb_odbc_handler,
6369   },
6370   {
6371     "odbc-ibase", UDM_DB_IBASE, UDM_DBAPI_ODBC, 0,
6372     /*
6373     while indexing large sites and using the SQL in statement
6374     interbase will fail when the items in the in IN statements
6375     are more then 1500. We'd better have to fix code to avoid
6376     big INs instead of hidding DBSQL_IN.
6377     */
6378     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_BIND |
6379     UDM_SQL_HAVE_FIRST_SKIP | UDM_SQL_HAVE_TRANSACT | UDM_SQL_HAVE_GOOD_COMMIT,
6380     &udm_sqldb_odbc_handler,
6381   },
6382   {
6383     "odbc-monetdb", UDM_DB_MONETDB, UDM_DBAPI_ODBC, 1,
6384     UDM_SQL_HAVE_GROUPBY | UDM_SQL_HAVE_SUBSELECT | UDM_SQL_HAVE_LIMIT |
6385     UDM_SQL_HAVE_BIND |
6386     /*UDM_SQL_HAVE_BIND_TEXT | */UDM_SQL_HAVE_BLOB_AS_HEX | UDM_SQL_HAVE_TRANSACT,
6387     &udm_sqldb_odbc_handler,
6388   },
6389 #endif
6390   {
6391     NULL, UDM_DB_MYSQL, UDM_DBAPI_MYSQL, 0, 0, NULL
6392   }
6393 };
6394 
6395 
UdmSQLDriverByName(const char * name)6396 static const UDM_SQLDB_DRIVER *UdmSQLDriverByName(const char *name)
6397 {
6398   const UDM_SQLDB_DRIVER *drv;
6399   for (drv= SQLDriver; drv->name; drv++)
6400   {
6401     if (!strcasecmp(name, drv->name))
6402       return drv;
6403     if (!strncasecmp(drv->name, "odbc-", 5) &&
6404         !strcasecmp(drv->name + 5, name))
6405       return drv;
6406   }
6407   return NULL;
6408 }
6409 
6410 
6411 static const UDM_DBMODE_HANDLER *
UdmSQLDBModeHandlerByID(int DBMode)6412 UdmSQLDBModeHandlerByID(int DBMode)
6413 {
6414   switch (DBMode)
6415   {
6416     case UDM_SQLDBMODE_BLOB:
6417       return &udm_dbmode_handler_blob;
6418     case UDM_SQLDBMODE_SINGLE:
6419       return &udm_dbmode_handler_single;
6420     case UDM_SQLDBMODE_MULTI:
6421       return &udm_dbmode_handler_multi;
6422     case UDM_SQLDBMODE_RAWBLOB:
6423       return &udm_dbmode_handler_rawblob;
6424   }
6425   UDM_ASSERT(0);
6426   return NULL;
6427 }
6428 
6429 
6430 static udm_rc_t
UdmDBSetParam(UDM_DB * db,char * param)6431 UdmDBSetParam(UDM_DB *db, char *param)
6432 {
6433   char *tok, *lt;
6434 
6435   for(tok = udm_strtok_r(param, "&",&lt) ; tok ;
6436       tok = udm_strtok_r(NULL,"&",&lt))
6437   {
6438     char * val;
6439     if((val=strchr(tok,'=')))
6440     {
6441       *val++='\0';
6442       UdmVarListReplaceStr(UdmSQLDBVars(db), tok, val);
6443     }
6444     else
6445     {
6446       UdmVarListReplaceStr(UdmSQLDBVars(db), tok, "");
6447     }
6448   }
6449   return UDM_OK;
6450 }
6451 
6452 
6453 
6454 static udm_rc_t
UdmDBSetAddrCommon(UDM_DB * db,UDM_URL * addr)6455 UdmDBSetAddrCommon(UDM_DB *db, UDM_URL *addr)
6456 {
6457   char *s;
6458   if (addr->auth)
6459   {
6460     /*
6461       Unescape user and password to allow URL specific
6462       characters like '"<>@#? to be used as user or password part.
6463 
6464       It's safe to spoil addr->auth here, as we don't
6465       need it anymore after setting DBUser and DBPass
6466     */
6467 
6468     if ((s= strchr(addr->auth,':')))
6469     {
6470       *s++= 0;
6471       UdmUnescapeCGIQuery(s, s);
6472       UdmVarListReplaceStr(UdmSQLDBVars(db), "DBPass", s);
6473     }
6474     UdmUnescapeCGIQuery(addr->auth, addr->auth);
6475     UdmVarListReplaceStr(UdmSQLDBVars(db), "DBUser", addr->auth);
6476   }
6477 
6478   UdmVarListReplaceStr(UdmSQLDBVars(db), "DBHost", addr->hostname);
6479   if (addr->port)
6480     UdmVarListReplaceInt(UdmSQLDBVars(db), "DBPort", addr->port);
6481 
6482   if((s = strchr(UDM_NULL2EMPTY(addr->filename), '?')))
6483   {
6484     *s++='\0';
6485     if (UDM_OK != UdmDBSetParam(db, s))
6486       return UDM_ERROR;
6487     UdmVarListReplaceStr(UdmSQLDBVars(db), "filename", addr->filename);
6488   }
6489   else
6490   {
6491     UdmVarListReplaceStr(UdmSQLDBVars(db), "filename", addr->filename);
6492   }
6493 
6494   return UDM_OK;
6495 }
6496 
6497 
6498 static udm_rc_t
UdmDBSetAddrSQLFromURL(UDM_DB * db,UDM_URL * addr,const char * dbaddr)6499 UdmDBSetAddrSQLFromURL(UDM_DB *db, UDM_URL *addr, const char *dbaddr)
6500 {
6501   const char *v;
6502   const UDM_SQLDB_DRIVER *drv= UdmSQLDriverByName(addr->schema);
6503   if (!drv)
6504     return UDM_NOTARGET;
6505 
6506   if (!(db->specific= UdmMalloc(sizeof(UDM_SQLDB))))
6507     return UDM_ERROR;
6508   bzero(db->specific, sizeof(UDM_SQLDB));
6509 
6510   UdmVarListReplaceStr(UdmSQLDBVars(db), "DBAddr", dbaddr);
6511 
6512   if (UdmDBSetAddrCommon(db, addr))
6513     return UDM_ERROR;
6514 
6515   UdmSQLDB(db)->DBMode= UDM_SQLDBMODE_BLOB;
6516   UdmSQL(db)->DBType= drv->DBType;
6517   UdmSQL(db)->DBDriver= drv->DBDriver;
6518   UdmSQL(db)->DBSQL_IN= drv->DBSQL_IN;
6519   UdmSQL(db)->flags= drv->flags;
6520   UdmSQL(db)->handler= drv->handler[0];
6521   db->dbhandler= &udm_dbhandler_sql;
6522 
6523   if ((v= UdmVarListFindStr(UdmSQLDBVars(db),"dbmode",NULL)))
6524   {
6525     udm_bool_t error;
6526     UdmSQLDB(db)->DBMode= UdmStr2DBMode(v, &error);
6527     if (error)
6528     {
6529       udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db), "Unknown DBMode");
6530       UDM_FREE(db->specific);
6531       return UDM_ERROR;
6532     }
6533   }
6534 
6535   UdmSQLDB(db)->dbmode_handler= UdmSQLDBModeHandlerByID(UdmSQLDBMode(db));
6536 
6537   if ((v= UdmVarListFindStr(UdmSQLDBVars(db),"dbmodesearch",NULL)))
6538   {
6539     udm_bool_t error;
6540     udm_sqldbmode_t DBMode= UdmStr2DBMode(v, &error);
6541     if (error)
6542     {
6543       udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db), "Unknown DBModeSearch");
6544       UDM_FREE(db->specific);
6545       return UDM_ERROR;
6546     }
6547     if (DBMode == UDM_SQLDBMODE_BLOB  &&
6548         UdmSQLDBType(db) != UDM_DB_MYSQL &&
6549         UdmSQLDBType(db) != UDM_DB_SYBASE &&
6550         UdmSQLDBType(db) != UDM_DB_MSSQL &&
6551         UdmSQLDBType(db) != UDM_DB_MIMER &&
6552         UdmSQLDBType(db) != UDM_DB_ORACLE8 &&
6553         UdmSQLDBType(db) != UDM_DB_DB2 &&
6554         UdmSQLDBType(db) != UDM_DB_PGSQL &&
6555         UdmSQLDBType(db) != UDM_DB_IBASE &&
6556         UdmSQLDBType(db) != UDM_DB_VIRT &&
6557         UdmSQLDBType(db) != UDM_DB_SQLITE3 &&
6558         UdmSQLDBType(db) != UDM_DB_MONETDB)
6559     {
6560       udm_snprintf(UdmDBSQLError(db), UdmDBSQLErrorSize(db),
6561                    "This DBMode=blob is not supported with this database");
6562       UDM_FREE(db->specific);
6563       return UDM_ERROR;
6564     }
6565   }
6566 
6567   if((v= UdmVarListFindStr(UdmSQLDBVars(db), "debugsql", "no")))
6568   {
6569     if (!strcasecmp(v, "yes"))
6570       UdmSQL(db)->flags|= UDM_SQL_DEBUG_QUERY;
6571   }
6572 
6573   if(UdmSQLDBDriver(db) == UDM_DBAPI_IBASE ||
6574      UdmSQLDBDriver(db) == UDM_DBAPI_SQLITE ||
6575      UdmSQLDBDriver(db) == UDM_DBAPI_SQLITE3)
6576   {
6577     /*
6578       Ibase is a special case:
6579       It's database name consists of full path and file name
6580     */
6581     UdmVarListReplaceStr(UdmSQLDBVars(db), "DBName", UDM_NULL2EMPTY(addr->path));
6582   }
6583   else
6584   {
6585     /*
6586       ODBC Data Source Names may contain space and
6587       other tricky characters, let's unescape them.
6588     */
6589     size_t len= strlen(UDM_NULL2EMPTY(addr->path));
6590     char *DBName= (char*) UdmMalloc(len + 1);
6591     char *src= (char*) UdmMalloc(len + 1);
6592     src[0]= '\0';
6593     sscanf(UDM_NULL2EMPTY(addr->path), "/%[^/]s", src);
6594     UdmUnescapeCGIQuery(DBName, src);
6595     UdmVarListReplaceStr(UdmSQLDBVars(db), "DBName", DBName);
6596     UdmFree(src);
6597     UdmFree(DBName);
6598   }
6599 
6600   if (UdmVarListFindInt(UdmSQLDBVars(db), "ps", 0) == 123)
6601   {
6602     UdmSQL(db)->handler.Prepare= UdmSQLPrepareGeneric;
6603     UdmSQL(db)->handler.Bind= UdmSQLBindGeneric;
6604     UdmSQL(db)->handler.Exec= UdmSQLExecGeneric;
6605     UdmSQL(db)->handler.StmtFree= UdmSQLStmtFreeGeneric;
6606     UdmSQL(db)->flags|= UDM_SQL_HAVE_BIND;
6607   }
6608   else if ((UdmSQLDBType(db) == UDM_DB_MSSQL   ||
6609             UdmSQLDBType(db) == UDM_DB_SYBASE  ||
6610             UdmSQLDBType(db) == UDM_DB_MYSQL   ||
6611             UdmSQLDBType(db) == UDM_DB_PGSQL   ||
6612             UdmSQLDBType(db) == UDM_DB_SQLITE  ||
6613             UdmSQLDBType(db) == UDM_DB_SQLITE3)&&
6614             !strcasecmp(UdmVarListFindStr(UdmSQLDBVars(db), "ps", ""), "none"))
6615   {
6616     UdmSQL(db)->flags&= (0x7FFFFFFF ^ UDM_SQL_HAVE_BIND);
6617   }
6618   else if ((UdmSQLDBDriver(db) == UDM_DBAPI_MYSQL ||
6619             UdmSQLDBDriver(db) == UDM_DBAPI_PGSQL ||
6620             UdmSQLDBDriver(db) == UDM_DBAPI_ODBC) &&
6621             UdmSQL(db)->handler.Exec &&
6622             UdmVarListFindBool(UdmSQLDBVars(db), "ps", UDM_FALSE))
6623   {
6624     UdmSQL(db)->flags|= UDM_SQL_HAVE_BIND;
6625   }
6626 
6627   return UDM_OK;
6628 }
6629 
6630 
6631 static udm_rc_t
UdmDBSetAddrSQL(UDM_DB * db,const char * dbaddr)6632 UdmDBSetAddrSQL(UDM_DB *db, const char *dbaddr)
6633 {
6634   udm_rc_t rc= UDM_NOTARGET;
6635   UDM_URL addr;
6636   UdmURLInit(&addr);
6637   if (!UdmURLParse(&addr, dbaddr) && addr.schema)
6638     rc= UdmDBSetAddrSQLFromURL(db, &addr, dbaddr);
6639   UdmURLFree(&addr);
6640   return rc;
6641 }
6642 
6643 
6644 static udm_rc_t
UdmDBCloseSQL(UDM_DB * db)6645 UdmDBCloseSQL(UDM_DB *db)
6646 {
6647   if (db->specific)
6648   {
6649     if (UdmSQLDBConnected(db))
6650       UdmSQLClose(UdmSQL(db));
6651     UdmWordCacheInit(&UdmSQLDB(db)->WordCache);
6652     UdmVarListFree(UdmSQLDBVars(db));
6653     UdmURLIdCacheFree(&UdmSQLDB(db)->URLIdCache);
6654     UDM_FREE(db->specific);
6655   }
6656   return UDM_OK;
6657 }
6658 
6659 
6660 static udm_rc_t
UdmDBActionSQL(UDM_AGENT * A,UDM_DB * db,udm_dbcmd_t cmd)6661 UdmDBActionSQL(UDM_AGENT *A, UDM_DB *db, udm_dbcmd_t cmd)
6662 {
6663   switch (cmd)
6664   {
6665     case UDM_DBCMD_CREATE:    return UdmCreateOrDropSQL(A, db, cmd);
6666     case UDM_DBCMD_DROP:      return UdmCreateOrDropSQL(A, db, cmd);
6667     case UDM_DBCMD_DOCCOUNT:  return UdmGetDocCount(A, db);
6668     case UDM_DBCMD_CRAWLER_FINALIZE:   return UdmWordCacheWrite(A, db, 0);
6669   }
6670   return UDM_ERROR;
6671 }
6672 
6673 
6674 static udm_rc_t
UdmDBInfoSQL(UDM_DB * db,void * dst,size_t dstlen,size_t * bytes_written,udm_dbinfo_t info)6675 UdmDBInfoSQL(UDM_DB *db, void *dst, size_t dstlen, size_t *bytes_written,
6676              udm_dbinfo_t info)
6677 {
6678   switch (info)
6679   {
6680     case UDM_DBINFO_IS_THREAD_SAFE:
6681       if (!dstlen)
6682         return UDM_ERROR;
6683       /* TODO: link against libmysqlclient_r */
6684       ((udm_bool_t *) dst)[0]= UDM_FALSE;
6685       *bytes_written= 1;
6686       return UDM_OK;
6687     case UDM_DBINFO_ERRCODE:
6688       if (dstlen < sizeof(int))
6689         return UDM_ERROR;
6690       ((int *)dst)[0]= UdmSQL(db)->errcode;
6691       *bytes_written= sizeof(int);
6692       return UDM_OK;
6693     case UDM_DBINFO_ADDR:
6694     {
6695       const char *dbaddr= UdmVarListFindStr(UdmSQLDBVars(db), "DBAddr", "<noaddr>");
6696       if (dstlen < 1)
6697         return UDM_ERROR;
6698       *bytes_written= udm_snprintf((char*) dst, dstlen, "%s", dbaddr);
6699       return UDM_OK;
6700     }
6701     case UDM_DBINFO_ERRSTR:
6702     {
6703       if (dstlen < 1)
6704         return UDM_ERROR;
6705       *bytes_written= udm_snprintf((char*) dst, dstlen, "%s", UdmDBSQLError(db));
6706       return UDM_OK;
6707     }
6708   }
6709   return UDM_ERROR;
6710 }
6711 
6712 
6713 static udm_rc_t
UdmRewriteURLData(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)6714 UdmRewriteURLData(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
6715 {
6716   udm_rc_t rc;
6717   UDM_URLID_LIST limit;
6718   bzero((void*) &limit, sizeof(limit));
6719   if (UDM_OK != (rc= UdmLoadURLDataFromURLForConv(A, db, Query, &limit)))
6720     return rc;
6721   return udm_dbmode_handler_blob.QueryAction(A, db, Query, UDM_QUERYCMD_REWRITE_URLDATA);
6722 }
6723 
6724 
6725 static udm_rc_t
UdmRewritePopularity(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query)6726 UdmRewritePopularity(UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query)
6727 {
6728   udm_rc_t rc;
6729   UDM_URLID_LIST limit;
6730   bzero((void*) &limit, sizeof(limit));
6731   if (UDM_OK != (rc= UdmLoadURLDataFromURLForConv(A, db, Query, &limit)))
6732     return rc;
6733   return udm_dbmode_handler_blob.QueryAction(A, db, Query, UDM_QUERYCMD_REWRITE_POPULARITY);
6734 }
6735 
6736 
6737 static udm_rc_t
UdmQueryActionSQL(UDM_AGENT * A,UDM_DB * db,UDM_QUERY * Query,udm_querycmd_t cmd)6738 UdmQueryActionSQL(UDM_AGENT *A, UDM_DB *db,
6739                   UDM_QUERY *Query, udm_querycmd_t cmd)
6740 {
6741   switch (cmd)
6742   {
6743     case UDM_QUERYCMD_FINDWORDS: return UdmFindWordsSQL(A, db, Query);
6744     case UDM_QUERYCMD_SUGGEST:   return UdmQuerySuggest(A, db, Query);
6745     case UDM_QUERYCMD_DOCINFO:   return UdmQueryAddDocInfoSQL(A, db, Query);
6746     case UDM_QUERYCMD_TRACK:     return UdmTrackSQL(A, db, Query);
6747     case UDM_QUERYCMD_CLONES:    return UdmQueryClones(A, db, Query);
6748     case UDM_QUERYCMD_WORDFORMS: return UdmQueryWordForms(A, db, Query);
6749     case UDM_QUERYCMD_TARGETS:   return UdmTargetsSQL(A, db, Query);
6750     case UDM_QUERYCMD_CLEAR:     return UdmClearDBSQL(A, db, Query);
6751     case UDM_QUERYCMD_INDEX:     return UdmIndexSQL(A, db, Query);
6752     case UDM_QUERYCMD_EXPORT:    return UdmExportSQL(A, db, Query);
6753     case UDM_QUERYCMD_EXPIRE:    return UdmMarkForReindex(A, db, Query);
6754     case UDM_QUERYCMD_REFERERS:  return UdmGetReferers(A, db, Query);
6755     case UDM_QUERYCMD_WORDSTAT:  return UdmWordStatCreate(A, db, Query);
6756     case UDM_QUERYCMD_DUMPDATA:  return UdmDumpData(A, db, Query);
6757     case UDM_QUERYCMD_STATISTICS:return UdmStatActionSQL(A, db, Query);
6758     case UDM_QUERYCMD_REWRITE_URLDATA:    return UdmRewriteURLData(A, db, Query);
6759     case UDM_QUERYCMD_REWRITE_POPULARITY: return UdmRewritePopularity(A, db, Query);
6760     case UDM_QUERYCMD_REWRITE_LIMITS:
6761       return udm_dbmode_handler_blob.QueryAction(A, db, Query, cmd);
6762   }
6763   return UDM_ERROR;
6764 }
6765 
6766 
6767 const UDM_DBHANDLER udm_dbhandler_sql=
6768 {
6769   UdmDBSetAddrSQL,     /*udm_rc_t (*Init)       (UDM_DB*, const char *addr);*/
6770   UdmDBCloseSQL,       /*udm_rc_t (*Close)      (UDM_DB*);*/
6771   UdmDBInfoSQL,        /*udm_rc_t (*Info)          (UDM_DB *db, void *dst, size_t dstlen, size_t *bytes_written, udm_dbhandler_info_t info);*/
6772   UdmQueryActionSQL,   /*udm_rc_t (*QueryAction)   (UDM_AGENT *A, UDM_DB *db, UDM_QUERY *Query, udm_querycmd_t cmd);*/
6773   UdmDBActionSQL,      /*udm_rc_t (*DBAction)      (UDM_AGENT *A, UDM_DB *db, udm_dbcmd_t cmd);*/
6774   UdmDocActionSQL,     /*udm_rc_t (*DocumentAction)(UDM_AGENT *A, UDM_DB *db, UDM_DOCUMENT *D, udm_doccmt_t cmd);*/
6775   UdmHrefActionSQL,    /*udm_rc_t (*HrefAction)    (UDM_AGENT *A, UDM_DB *db, UDM_HREF *H, udm_hrefcmd_t cmd)*/
6776   UdmSrvActionSQL,     /*udm_rc_t (*ServerAction)  (UDM_AGENT *A, UDM_DB *db, UDM_SERVERLIST *Srv, udm_srvcmd_t cmd);*/
6777 };
6778 
6779 
6780 #endif /* HAVE_SQL */
6781